15
15
#include " AMDGPU.h"
16
16
#include " AMDGPUSubtarget.h"
17
17
#include " GCNHazardRecognizer.h"
18
+ #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
18
19
#include " SIDefines.h"
19
20
#include " SIMachineFunctionInfo.h"
20
21
#include " SIRegisterInfo.h"
21
- #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
22
22
#include " Utils/AMDGPUBaseInfo.h"
23
23
#include " llvm/ADT/APInt.h"
24
24
#include " llvm/ADT/ArrayRef.h"
28
28
#include " llvm/Analysis/AliasAnalysis.h"
29
29
#include " llvm/Analysis/MemoryLocation.h"
30
30
#include " llvm/Analysis/ValueTracking.h"
31
+ #include " llvm/CodeGen/LiveVariables.h"
31
32
#include " llvm/CodeGen/MachineBasicBlock.h"
32
33
#include " llvm/CodeGen/MachineDominators.h"
33
34
#include " llvm/CodeGen/MachineFrameInfo.h"
@@ -2841,6 +2842,18 @@ static int64_t getFoldableImm(const MachineOperand* MO) {
2841
2842
return AMDGPU::NoRegister;
2842
2843
}
2843
2844
2845
+ static void updateLiveVariables (LiveVariables *LV, MachineInstr &MI,
2846
+ MachineInstr &NewMI) {
2847
+ if (LV) {
2848
+ unsigned NumOps = MI.getNumOperands ();
2849
+ for (unsigned I = 1 ; I < NumOps; ++I) {
2850
+ MachineOperand &Op = MI.getOperand (I);
2851
+ if (Op.isReg () && Op.isKill ())
2852
+ LV->replaceKillInstruction (Op.getReg (), MI, NewMI);
2853
+ }
2854
+ }
2855
+ }
2856
+
2844
2857
MachineInstr *SIInstrInfo::convertToThreeAddress (MachineFunction::iterator &MBB,
2845
2858
MachineInstr &MI,
2846
2859
LiveVariables *LV) const {
@@ -2888,43 +2901,53 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
2888
2901
const MachineOperand *Src2 = getNamedOperand (MI, AMDGPU::OpName::src2);
2889
2902
const MachineOperand *Clamp = getNamedOperand (MI, AMDGPU::OpName::clamp);
2890
2903
const MachineOperand *Omod = getNamedOperand (MI, AMDGPU::OpName::omod);
2904
+ MachineInstrBuilder MIB;
2891
2905
2892
2906
if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
2893
2907
// If we have an SGPR input, we will violate the constant bus restriction.
2894
- (ST.getConstantBusLimit (Opc) > 1 ||
2895
- !Src0->isReg () ||
2908
+ (ST.getConstantBusLimit (Opc) > 1 || !Src0->isReg () ||
2896
2909
!RI.isSGPRReg (MBB->getParent ()->getRegInfo (), Src0->getReg ()))) {
2897
2910
if (auto Imm = getFoldableImm (Src2)) {
2898
2911
unsigned NewOpc =
2899
- IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
2900
- : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
2901
- if (pseudoToMCOpcode (NewOpc) != -1 )
2902
- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2903
- .add (*Dst)
2904
- .add (*Src0)
2905
- .add (*Src1)
2906
- .addImm (Imm);
2907
- }
2908
- unsigned NewOpc =
2909
- IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
2910
- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
2912
+ IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
2913
+ : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
2914
+ if (pseudoToMCOpcode (NewOpc) != -1 ) {
2915
+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2916
+ .add (*Dst)
2917
+ .add (*Src0)
2918
+ .add (*Src1)
2919
+ .addImm (Imm);
2920
+ updateLiveVariables (LV, MI, *MIB);
2921
+ return MIB;
2922
+ }
2923
+ }
2924
+ unsigned NewOpc = IsFMA
2925
+ ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
2926
+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
2911
2927
if (auto Imm = getFoldableImm (Src1)) {
2912
- if (pseudoToMCOpcode (NewOpc) != -1 )
2913
- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2914
- .add (*Dst)
2915
- .add (*Src0)
2916
- .addImm (Imm)
2917
- .add (*Src2);
2928
+ if (pseudoToMCOpcode (NewOpc) != -1 ) {
2929
+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2930
+ .add (*Dst)
2931
+ .add (*Src0)
2932
+ .addImm (Imm)
2933
+ .add (*Src2);
2934
+ updateLiveVariables (LV, MI, *MIB);
2935
+ return MIB;
2936
+ }
2918
2937
}
2919
2938
if (auto Imm = getFoldableImm (Src0)) {
2920
2939
if (pseudoToMCOpcode (NewOpc) != -1 &&
2921
- isOperandLegal (MI, AMDGPU::getNamedOperandIdx (NewOpc,
2922
- AMDGPU::OpName::src0), Src1))
2923
- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2924
- .add (*Dst)
2925
- .add (*Src1)
2926
- .addImm (Imm)
2927
- .add (*Src2);
2940
+ isOperandLegal (
2941
+ MI, AMDGPU::getNamedOperandIdx (NewOpc, AMDGPU::OpName::src0),
2942
+ Src1)) {
2943
+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2944
+ .add (*Dst)
2945
+ .add (*Src1)
2946
+ .addImm (Imm)
2947
+ .add (*Src2);
2948
+ updateLiveVariables (LV, MI, *MIB);
2949
+ return MIB;
2950
+ }
2928
2951
}
2929
2952
}
2930
2953
@@ -2933,16 +2956,18 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
2933
2956
if (pseudoToMCOpcode (NewOpc) == -1 )
2934
2957
return nullptr ;
2935
2958
2936
- return BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2937
- .add (*Dst)
2938
- .addImm (Src0Mods ? Src0Mods->getImm () : 0 )
2939
- .add (*Src0)
2940
- .addImm (Src1Mods ? Src1Mods->getImm () : 0 )
2941
- .add (*Src1)
2942
- .addImm (0 ) // Src mods
2943
- .add (*Src2)
2944
- .addImm (Clamp ? Clamp->getImm () : 0 )
2945
- .addImm (Omod ? Omod->getImm () : 0 );
2959
+ MIB = BuildMI (*MBB, MI, MI.getDebugLoc (), get (NewOpc))
2960
+ .add (*Dst)
2961
+ .addImm (Src0Mods ? Src0Mods->getImm () : 0 )
2962
+ .add (*Src0)
2963
+ .addImm (Src1Mods ? Src1Mods->getImm () : 0 )
2964
+ .add (*Src1)
2965
+ .addImm (0 ) // Src mods
2966
+ .add (*Src2)
2967
+ .addImm (Clamp ? Clamp->getImm () : 0 )
2968
+ .addImm (Omod ? Omod->getImm () : 0 );
2969
+ updateLiveVariables (LV, MI, *MIB);
2970
+ return MIB;
2946
2971
}
2947
2972
2948
2973
// It's not generally safe to move VALU instructions across these since it will
0 commit comments