@@ -3602,6 +3602,26 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
36023602 }
36033603
36043604 switch (MI.getOpcode ()) {
3605+ case AMDGPU::S_UADDO_PSEUDO:
3606+ case AMDGPU::S_USUBO_PSEUDO: {
3607+ const DebugLoc &DL = MI.getDebugLoc ();
3608+ MachineOperand &Dest0 = MI.getOperand (0 );
3609+ MachineOperand &Dest1 = MI.getOperand (1 );
3610+ MachineOperand &Src0 = MI.getOperand (2 );
3611+ MachineOperand &Src1 = MI.getOperand (3 );
3612+
3613+ unsigned Opc = (MI.getOpcode () == AMDGPU::S_UADDO_PSEUDO)
3614+ ? AMDGPU::S_ADD_I32
3615+ : AMDGPU::S_SUB_I32;
3616+ BuildMI (*BB, MI, DL, TII->get (Opc), Dest0.getReg ()).add (Src0).add (Src1);
3617+
3618+ BuildMI (*BB, MI, DL, TII->get (AMDGPU::S_CSELECT_B64), Dest1.getReg ())
3619+ .addImm (1 )
3620+ .addImm (0 );
3621+
3622+ MI.eraseFromParent ();
3623+ return BB;
3624+ }
36053625 case AMDGPU::S_ADD_U64_PSEUDO:
36063626 case AMDGPU::S_SUB_U64_PSEUDO: {
36073627 MachineRegisterInfo &MRI = BB->getParent ()->getRegInfo ();
@@ -3617,35 +3637,146 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
36173637 Register DestSub0 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
36183638 Register DestSub1 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
36193639
3620- MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm (MI, MRI,
3621- Src0, BoolRC, AMDGPU::sub0,
3622- &AMDGPU::SReg_32RegClass);
3623- MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm (MI, MRI,
3624- Src0, BoolRC, AMDGPU::sub1,
3625- &AMDGPU::SReg_32RegClass);
3640+ MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm (
3641+ MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
3642+ MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm (
3643+ MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
36263644
3627- MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm (MI, MRI,
3628- Src1, BoolRC, AMDGPU::sub0,
3629- &AMDGPU::SReg_32RegClass);
3630- MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm (MI, MRI,
3631- Src1, BoolRC, AMDGPU::sub1,
3632- &AMDGPU::SReg_32RegClass);
3645+ MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm (
3646+ MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
3647+ MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm (
3648+ MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
36333649
36343650 bool IsAdd = (MI.getOpcode () == AMDGPU::S_ADD_U64_PSEUDO);
36353651
36363652 unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
36373653 unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
3638- BuildMI (*BB, MI, DL, TII->get (LoOpc), DestSub0)
3639- .add (Src0Sub0)
3640- .add (Src1Sub0);
3641- BuildMI (*BB, MI, DL, TII->get (HiOpc), DestSub1)
3642- .add (Src0Sub1)
3643- .add (Src1Sub1);
3654+ BuildMI (*BB, MI, DL, TII->get (LoOpc), DestSub0).add (Src0Sub0).add (Src1Sub0);
3655+ BuildMI (*BB, MI, DL, TII->get (HiOpc), DestSub1).add (Src0Sub1).add (Src1Sub1);
36443656 BuildMI (*BB, MI, DL, TII->get (TargetOpcode::REG_SEQUENCE), Dest.getReg ())
3645- .addReg (DestSub0)
3646- .addImm (AMDGPU::sub0)
3647- .addReg (DestSub1)
3648- .addImm (AMDGPU::sub1);
3657+ .addReg (DestSub0)
3658+ .addImm (AMDGPU::sub0)
3659+ .addReg (DestSub1)
3660+ .addImm (AMDGPU::sub1);
3661+ MI.eraseFromParent ();
3662+ return BB;
3663+ }
3664+ case AMDGPU::V_ADD_U64_PSEUDO:
3665+ case AMDGPU::V_SUB_U64_PSEUDO: {
3666+ MachineRegisterInfo &MRI = BB->getParent ()->getRegInfo ();
3667+ const GCNSubtarget &ST = MF->getSubtarget <GCNSubtarget>();
3668+ const SIRegisterInfo *TRI = ST.getRegisterInfo ();
3669+ const DebugLoc &DL = MI.getDebugLoc ();
3670+
3671+ bool IsAdd = (MI.getOpcode () == AMDGPU::V_ADD_U64_PSEUDO);
3672+
3673+ const auto *CarryRC = TRI->getRegClass (AMDGPU::SReg_1_XEXECRegClassID);
3674+
3675+ Register DestSub0 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
3676+ Register DestSub1 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
3677+
3678+ Register CarryReg = MRI.createVirtualRegister (CarryRC);
3679+ Register DeadCarryReg = MRI.createVirtualRegister (CarryRC);
3680+
3681+ MachineOperand &Dest = MI.getOperand (0 );
3682+ MachineOperand &Src0 = MI.getOperand (1 );
3683+ MachineOperand &Src1 = MI.getOperand (2 );
3684+
3685+ const TargetRegisterClass *Src0RC = Src0.isReg ()
3686+ ? MRI.getRegClass (Src0.getReg ())
3687+ : &AMDGPU::VReg_64RegClass;
3688+ const TargetRegisterClass *Src1RC = Src1.isReg ()
3689+ ? MRI.getRegClass (Src1.getReg ())
3690+ : &AMDGPU::VReg_64RegClass;
3691+
3692+ const TargetRegisterClass *Src0SubRC =
3693+ TRI->getSubRegClass (Src0RC, AMDGPU::sub0);
3694+ const TargetRegisterClass *Src1SubRC =
3695+ TRI->getSubRegClass (Src1RC, AMDGPU::sub1);
3696+
3697+ MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm (
3698+ MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
3699+ MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm (
3700+ MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
3701+
3702+ MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm (
3703+ MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
3704+ MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm (
3705+ MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
3706+
3707+ unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
3708+ MachineInstr *LoHalf = BuildMI (*BB, MI, DL, TII->get (LoOpc), DestSub0)
3709+ .addReg (CarryReg, RegState::Define)
3710+ .add (SrcReg0Sub0)
3711+ .add (SrcReg1Sub0)
3712+ .addImm (0 ); // clamp bit
3713+
3714+ unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
3715+ MachineInstr *HiHalf =
3716+ BuildMI (*BB, MI, DL, TII->get (HiOpc), DestSub1)
3717+ .addReg (DeadCarryReg, RegState::Define | RegState::Dead)
3718+ .add (SrcReg0Sub1)
3719+ .add (SrcReg1Sub1)
3720+ .addReg (CarryReg, RegState::Kill)
3721+ .addImm (0 ); // clamp bit
3722+
3723+ BuildMI (*BB, MI, DL, TII->get (TargetOpcode::REG_SEQUENCE), Dest.getReg ())
3724+ .addReg (DestSub0)
3725+ .addImm (AMDGPU::sub0)
3726+ .addReg (DestSub1)
3727+ .addImm (AMDGPU::sub1);
3728+ TII->legalizeOperands (*LoHalf);
3729+ TII->legalizeOperands (*HiHalf);
3730+ MI.eraseFromParent ();
3731+ return BB;
3732+ }
3733+ case AMDGPU::S_ADD_CO_PSEUDO:
3734+ case AMDGPU::S_SUB_CO_PSEUDO: {
3735+ // This pseudo has a chance to be selected
3736+ // only from uniform add/subcarry node. All the VGPR operands
3737+ // therefore assumed to be splat vectors.
3738+ MachineRegisterInfo &MRI = BB->getParent ()->getRegInfo ();
3739+ const GCNSubtarget &ST = MF->getSubtarget <GCNSubtarget>();
3740+ const SIRegisterInfo *TRI = ST.getRegisterInfo ();
3741+ MachineBasicBlock::iterator MII = MI;
3742+ const DebugLoc &DL = MI.getDebugLoc ();
3743+ MachineOperand &Dest = MI.getOperand (0 );
3744+ MachineOperand &Src0 = MI.getOperand (2 );
3745+ MachineOperand &Src1 = MI.getOperand (3 );
3746+ MachineOperand &Src2 = MI.getOperand (4 );
3747+ unsigned Opc = (MI.getOpcode () == AMDGPU::S_ADD_CO_PSEUDO)
3748+ ? AMDGPU::S_ADDC_U32
3749+ : AMDGPU::S_SUBB_U32;
3750+ if (Src0.isReg () && TRI->isVectorRegister (MRI, Src0.getReg ())) {
3751+ Register RegOp0 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3752+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), RegOp0)
3753+ .addReg (Src0.getReg ());
3754+ Src0.setReg (RegOp0);
3755+ }
3756+ if (Src1.isReg () && TRI->isVectorRegister (MRI, Src1.getReg ())) {
3757+ Register RegOp1 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3758+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), RegOp1)
3759+ .addReg (Src1.getReg ());
3760+ Src1.setReg (RegOp1);
3761+ }
3762+ Register RegOp2 = MRI.createVirtualRegister (&AMDGPU::SReg_32RegClass);
3763+ if (TRI->isVectorRegister (MRI, Src2.getReg ())) {
3764+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), RegOp2)
3765+ .addReg (Src2.getReg ());
3766+ Src2.setReg (RegOp2);
3767+ }
3768+
3769+ if (TRI->getRegSizeInBits (*MRI.getRegClass (Src2.getReg ())) == 64 ) {
3770+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::S_CMP_LG_U64))
3771+ .addReg (Src2.getReg ())
3772+ .addImm (0 );
3773+ } else {
3774+ BuildMI (*BB, MII, DL, TII->get (AMDGPU::S_CMPK_LG_U32))
3775+ .addReg (Src2.getReg ())
3776+ .addImm (0 );
3777+ }
3778+
3779+ BuildMI (*BB, MII, DL, TII->get (Opc), Dest.getReg ()).add (Src0).add (Src1);
36493780 MI.eraseFromParent ();
36503781 return BB;
36513782 }
0 commit comments