@@ -700,16 +700,45 @@ static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
700700// csel instruction. If so, return the folded opcode, and the replacement
701701// register.
702702static unsigned canFoldIntoCSel (const MachineRegisterInfo &MRI, unsigned VReg,
703- unsigned *NewVReg = nullptr ) {
703+ unsigned *NewReg = nullptr ) {
704704 VReg = removeCopies (MRI, VReg);
705705 if (!Register::isVirtualRegister (VReg))
706706 return 0 ;
707707
708708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq (MRI.getRegClass (VReg));
709709 const MachineInstr *DefMI = MRI.getVRegDef (VReg);
710710 unsigned Opc = 0 ;
711- unsigned SrcOpNum = 0 ;
711+ unsigned SrcReg = 0 ;
712712 switch (DefMI->getOpcode ()) {
713+ case AArch64::SUBREG_TO_REG:
714+ // Check for the following way to define an 64-bit immediate:
715+ // %0:gpr32 = MOVi32imm 1
716+ // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
717+ if (!DefMI->getOperand (1 ).isImm () || DefMI->getOperand (1 ).getImm () != 0 )
718+ return 0 ;
719+ if (!DefMI->getOperand (2 ).isReg ())
720+ return 0 ;
721+ if (!DefMI->getOperand (3 ).isImm () ||
722+ DefMI->getOperand (3 ).getImm () != AArch64::sub_32)
723+ return 0 ;
724+ DefMI = MRI.getVRegDef (DefMI->getOperand (2 ).getReg ());
725+ if (DefMI->getOpcode () != AArch64::MOVi32imm)
726+ return 0 ;
727+ if (!DefMI->getOperand (1 ).isImm () || DefMI->getOperand (1 ).getImm () != 1 )
728+ return 0 ;
729+ assert (Is64Bit);
730+ SrcReg = AArch64::XZR;
731+ Opc = AArch64::CSINCXr;
732+ break ;
733+
734+ case AArch64::MOVi32imm:
735+ case AArch64::MOVi64imm:
736+ if (!DefMI->getOperand (1 ).isImm () || DefMI->getOperand (1 ).getImm () != 1 )
737+ return 0 ;
738+ SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
739+ Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
740+ break ;
741+
713742 case AArch64::ADDSXri:
714743 case AArch64::ADDSWri:
715744 // if NZCV is used, do not fold.
@@ -724,7 +753,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
724753 if (!DefMI->getOperand (2 ).isImm () || DefMI->getOperand (2 ).getImm () != 1 ||
725754 DefMI->getOperand (3 ).getImm () != 0 )
726755 return 0 ;
727- SrcOpNum = 1 ;
756+ SrcReg = DefMI-> getOperand ( 1 ). getReg () ;
728757 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729758 break ;
730759
@@ -734,7 +763,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
734763 unsigned ZReg = removeCopies (MRI, DefMI->getOperand (1 ).getReg ());
735764 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736765 return 0 ;
737- SrcOpNum = 2 ;
766+ SrcReg = DefMI-> getOperand ( 2 ). getReg () ;
738767 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739768 break ;
740769 }
@@ -753,17 +782,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
753782 unsigned ZReg = removeCopies (MRI, DefMI->getOperand (1 ).getReg ());
754783 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755784 return 0 ;
756- SrcOpNum = 2 ;
785+ SrcReg = DefMI-> getOperand ( 2 ). getReg () ;
757786 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758787 break ;
759788 }
760789 default :
761790 return 0 ;
762791 }
763- assert (Opc && SrcOpNum && " Missing parameters" );
792+ assert (Opc && SrcReg && " Missing parameters" );
764793
765- if (NewVReg )
766- *NewVReg = DefMI-> getOperand (SrcOpNum). getReg () ;
794+ if (NewReg )
795+ *NewReg = SrcReg ;
767796 return Opc;
768797}
769798
@@ -964,28 +993,34 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
964993
965994 // Try folding simple instructions into the csel.
966995 if (TryFold) {
967- unsigned NewVReg = 0 ;
968- unsigned FoldedOpc = canFoldIntoCSel (MRI, TrueReg, &NewVReg );
996+ unsigned NewReg = 0 ;
997+ unsigned FoldedOpc = canFoldIntoCSel (MRI, TrueReg, &NewReg );
969998 if (FoldedOpc) {
970999 // The folded opcodes csinc, csinc and csneg apply the operation to
9711000 // FalseReg, so we need to invert the condition.
9721001 CC = AArch64CC::getInvertedCondCode (CC);
9731002 TrueReg = FalseReg;
9741003 } else
975- FoldedOpc = canFoldIntoCSel (MRI, FalseReg, &NewVReg );
1004+ FoldedOpc = canFoldIntoCSel (MRI, FalseReg, &NewReg );
9761005
9771006 // Fold the operation. Leave any dead instructions for DCE to clean up.
9781007 if (FoldedOpc) {
979- FalseReg = NewVReg ;
1008+ FalseReg = NewReg ;
9801009 Opc = FoldedOpc;
981- // The extends the live range of NewVReg .
982- MRI.clearKillFlags (NewVReg );
1010+ // Extend the live range of NewReg .
1011+ MRI.clearKillFlags (NewReg );
9831012 }
9841013 }
9851014
9861015 // Pull all virtual register into the appropriate class.
9871016 MRI.constrainRegClass (TrueReg, RC);
988- MRI.constrainRegClass (FalseReg, RC);
1017+ // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1018+ assert (
1019+ (FalseReg.isVirtual () || FalseReg == AArch64::WZR ||
1020+ FalseReg == AArch64::XZR) &&
1021+ " FalseReg was folded into a non-virtual register other than WZR or XZR" );
1022+ if (FalseReg.isVirtual ())
1023+ MRI.constrainRegClass (FalseReg, RC);
9891024
9901025 // Insert the csel.
9911026 BuildMI (MBB, I, DL, get (Opc), DstReg)
0 commit comments