Skip to content

Commit a09ca75

Browse files
authored
[AMDGPU][WaveTransform] Update GCNLaneMaskUtils to make it use AMDGPULaneMaskConstants (#473)
This patch makes GCNLaneMaskUtils to use LaneMaskConstants defined by globally used AMDGPULaneMaskUtils, instead of GCNLaneMaskConstant eliminating the code duplication.
1 parent 63f7cb1 commit a09ca75

File tree

4 files changed

+63
-114
lines changed

4 files changed

+63
-114
lines changed

llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ class LaneMaskConstants {
3636
const unsigned MovTermOpc;
3737
const unsigned OrOpc;
3838
const unsigned OrTermOpc;
39+
const unsigned OrN2Opc;
3940
const unsigned OrSaveExecOpc;
4041
const unsigned XorOpc;
4142
const unsigned XorTermOpc;
4243
const unsigned WQMOpc;
44+
const TargetRegisterClass *LaneMaskRC;
4345

4446
constexpr LaneMaskConstants(bool IsWave32)
4547
: ExecReg(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC),
@@ -62,11 +64,13 @@ class LaneMaskConstants {
6264
MovTermOpc(IsWave32 ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term),
6365
OrOpc(IsWave32 ? AMDGPU::S_OR_B32 : AMDGPU::S_OR_B64),
6466
OrTermOpc(IsWave32 ? AMDGPU::S_OR_B32_term : AMDGPU::S_OR_B64_term),
67+
OrN2Opc(IsWave32 ? AMDGPU::S_ORN2_B32 : AMDGPU::S_ORN2_B64),
6568
OrSaveExecOpc(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
6669
: AMDGPU::S_OR_SAVEEXEC_B64),
6770
XorOpc(IsWave32 ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64),
6871
XorTermOpc(IsWave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term),
69-
WQMOpc(IsWave32 ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64) {}
72+
WQMOpc(IsWave32 ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
73+
LaneMaskRC(IsWave32 ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass) {}
7074

7175
static inline const LaneMaskConstants &get(const GCNSubtarget &ST);
7276
};

llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1785,13 +1785,14 @@ void ControlFlowRewriter::prepareWaveCfg() {
17851785
/// manipulation.
17861786
void ControlFlowRewriter::rewrite() {
17871787
GCNLaneMaskAnalysis LMA(Function);
1788+
const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts();
17881789

17891790
Register RegAllOnes;
17901791
auto getAllOnes = [&]() {
17911792
if (!RegAllOnes) {
17921793
RegAllOnes = LMU.createLaneMaskReg();
17931794
BuildMI(Function.front(), Function.front().getFirstTerminator(), {},
1794-
TII.get(LMU.consts().OpMov), RegAllOnes)
1795+
TII.get(LMC.MovOpc), RegAllOnes)
17951796
.addImm(-1);
17961797
}
17971798
return RegAllOnes;
@@ -1841,12 +1842,12 @@ void ControlFlowRewriter::rewrite() {
18411842
if (!LMA.isSubsetOfExec(CondReg, *Node->Block)) {
18421843
CondReg = LMU.createLaneMaskReg();
18431844
BuildMI(*Node->Block, Node->Block->end(), {},
1844-
TII.get(LMU.consts().OpAnd), CondReg)
1845-
.addReg(LMU.consts().RegExec)
1845+
TII.get(LMC.AndOpc), CondReg)
1846+
.addReg(LMC.ExecReg)
18461847
.addReg(Info.OrigCondition);
18471848
}
18481849
BuildMI(*Node->Block, Node->Block->end(), {}, TII.get(AMDGPU::COPY),
1849-
LMU.consts().RegVcc)
1850+
LMC.VccReg)
18501851
.addReg(CondReg);
18511852

18521853
Opcode = AMDGPU::S_CBRANCH_VCCNZ;
@@ -1924,15 +1925,15 @@ void ControlFlowRewriter::rewrite() {
19241925
if (!LaneOrigin.InvertCondition) {
19251926
BuildMI(*LaneOrigin.Node->Block,
19261927
LaneOrigin.Node->Block->getFirstTerminator(), {},
1927-
TII.get(LMU.consts().OpCSelect), CondReg)
1928-
.addReg(LMU.consts().RegExec)
1928+
TII.get(LMC.CSelectOpc), CondReg)
1929+
.addReg(LMC.ExecReg)
19291930
.addImm(0);
19301931
} else {
19311932
BuildMI(*LaneOrigin.Node->Block,
19321933
LaneOrigin.Node->Block->getFirstTerminator(), {},
1933-
TII.get(LMU.consts().OpCSelect), CondReg)
1934+
TII.get(LMC.CSelectOpc), CondReg)
19341935
.addImm(0)
1935-
.addReg(LMU.consts().RegExec);
1936+
.addReg(LMC.ExecReg);
19361937
}
19371938
} else {
19381939
CondReg = LaneOrigin.CondReg;
@@ -1941,8 +1942,8 @@ void ControlFlowRewriter::rewrite() {
19411942
CondReg = LMU.createLaneMaskReg();
19421943
BuildMI(*LaneOrigin.Node->Block,
19431944
LaneOrigin.Node->Block->getFirstTerminator(), {},
1944-
TII.get(LMU.consts().OpAnd), CondReg)
1945-
.addReg(LMU.consts().RegExec)
1945+
TII.get(LMC.AndOpc), CondReg)
1946+
.addReg(LMC.ExecReg)
19461947
.addReg(Prev);
19471948

19481949
RegMap[std::make_pair(LaneOrigin.Node->Block, LaneOrigin.CondReg)]
@@ -1962,7 +1963,7 @@ void ControlFlowRewriter::rewrite() {
19621963
CondReg = LMU.createLaneMaskReg();
19631964
BuildMI(*LaneOrigin.Node->Block,
19641965
LaneOrigin.Node->Block->getFirstTerminator(), {},
1965-
TII.get(LMU.consts().OpXor), CondReg)
1966+
TII.get(LMC.XorOpc), CondReg)
19661967
.addReg(LaneOrigin.CondReg)
19671968
.addImm(-1);
19681969

@@ -1999,7 +2000,7 @@ void ControlFlowRewriter::rewrite() {
19992000
<< '\n');
20002001

20012002
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
2002-
TII.get(LMU.consts().OpMovTerm), LMU.consts().RegExec)
2003+
TII.get(LMC.MovTermOpc), LMC.ExecReg)
20032004
.addReg(OriginCFGNodeInfo.PrimarySuccessorExec);
20042005
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
20052006
TII.get(AMDGPU::SI_WAVE_CF_EDGE));
@@ -2046,12 +2047,12 @@ void ControlFlowRewriter::rewrite() {
20462047
Register Rejoin;
20472048

20482049
if (PrimaryExecDef->getParent() == Pred->Block &&
2049-
PrimaryExecDef->getOpcode() == LMU.consts().OpXor &&
2050+
PrimaryExecDef->getOpcode() == LMC.XorOpc &&
20502051
PrimaryExecDef->getOperand(1).isReg() &&
20512052
PrimaryExecDef->getOperand(2).isReg()) {
2052-
if (PrimaryExecDef->getOperand(1).getReg() == LMU.consts().RegExec)
2053+
if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
20532054
Rejoin = PrimaryExecDef->getOperand(2).getReg();
2054-
else if (PrimaryExecDef->getOperand(2).getReg() == LMU.consts().RegExec)
2055+
else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
20552056
Rejoin = PrimaryExecDef->getOperand(1).getReg();
20562057
}
20572058

@@ -2069,8 +2070,8 @@ void ControlFlowRewriter::rewrite() {
20692070
if (!Rejoin) {
20702071
Rejoin = LMU.createLaneMaskReg();
20712072
BuildMI(*Pred->Block, Pred->Block->getFirstTerminator(), {},
2072-
TII.get(LMU.consts().OpXor), Rejoin)
2073-
.addReg(LMU.consts().RegExec)
2073+
TII.get(LMC.XorOpc), Rejoin)
2074+
.addReg(LMC.ExecReg)
20742075
.addReg(PrimaryExec);
20752076
}
20762077

@@ -2084,8 +2085,8 @@ void ControlFlowRewriter::rewrite() {
20842085

20852086
Register Rejoin = Updater.getValueInMiddleOfBlock(*Secondary->Block);
20862087
BuildMI(*Secondary->Block, Secondary->Block->getFirstNonPHI(), {},
2087-
TII.get(LMU.consts().OpOr), LMU.consts().RegExec)
2088-
.addReg(LMU.consts().RegExec)
2088+
TII.get(LMC.OrOpc), LMC.ExecReg)
2089+
.addReg(LMC.ExecReg)
20892090
.addReg(Rejoin);
20902091

20912092
LLVM_DEBUG(Function.dump());
@@ -2136,7 +2137,6 @@ class AMDGPUWaveTransform : public MachineFunctionPass {
21362137
MachineDominatorTree *DomTree = nullptr;
21372138
// MachineConvergenceInfo ConvergenceInfo;
21382139
MachineCycleInfo *CycleInfo;
2139-
GCNLaneMaskUtils LMU;
21402140
const SIInstrInfo *TII;
21412141
};
21422142

@@ -2163,7 +2163,6 @@ bool AMDGPUWaveTransform::runOnMachineFunction(MachineFunction &MF) {
21632163
LLVM_DEBUG(dbgs() << "AMDGPU Wave Transformnsform: " << MF.getName() << '\n');
21642164

21652165
DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2166-
LMU.setFunction(MF);
21672166
TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
21682167

21692168
// ConvergenceInfo = computeMachineConvergenceInfo(MF, *DomTree);

llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp

Lines changed: 28 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -16,47 +16,13 @@
1616

1717
using namespace llvm;
1818

19-
/// Obtain a reference to the global wavefront-size dependent constants
20-
/// based on \p wavefrontSize.
21-
const GCNLaneMaskConstants *
22-
GCNLaneMaskUtils::getConsts(unsigned WavefrontSize) {
23-
static const GCNLaneMaskConstants Wave32 = {
24-
AMDGPU::EXEC_LO, AMDGPU::VCC_LO, &AMDGPU::SReg_32RegClass,
25-
AMDGPU::S_MOV_B32, AMDGPU::S_MOV_B32_term, AMDGPU::S_AND_B32,
26-
AMDGPU::S_OR_B32, AMDGPU::S_XOR_B32, AMDGPU::S_ANDN2_B32,
27-
AMDGPU::S_ORN2_B32, AMDGPU::S_CSELECT_B32,
28-
};
29-
static const GCNLaneMaskConstants Wave64 = {
30-
AMDGPU::EXEC,
31-
AMDGPU::VCC,
32-
&AMDGPU::SReg_64RegClass,
33-
AMDGPU::S_MOV_B64,
34-
AMDGPU::S_MOV_B64_term,
35-
AMDGPU::S_AND_B64,
36-
AMDGPU::S_OR_B64,
37-
AMDGPU::S_XOR_B64,
38-
AMDGPU::S_ANDN2_B64,
39-
AMDGPU::S_ORN2_B64,
40-
AMDGPU::S_CSELECT_B64,
41-
};
42-
assert(WavefrontSize == 32 || WavefrontSize == 64);
43-
return WavefrontSize == 32 ? &Wave32 : &Wave64;
44-
}
45-
46-
/// Obtain a reference to the global wavefront-size dependent constants
47-
/// based on the wavefront-size of \p function.
48-
const GCNLaneMaskConstants *GCNLaneMaskUtils::getConsts(MachineFunction &MF) {
49-
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
50-
return getConsts(ST.getWavefrontSize());
51-
}
52-
5319
/// Check whether the register could be a lane-mask register.
5420
///
5521
/// It does not distinguish between lane-masks and scalar registers that happen
5622
/// to have the right bitsize.
5723
bool GCNLaneMaskUtils::maybeLaneMask(Register Reg) const {
58-
MachineRegisterInfo &MRI = MF->getRegInfo();
59-
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
24+
MachineRegisterInfo &MRI = MF.getRegInfo();
25+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
6026
const SIInstrInfo *TII = ST.getInstrInfo();
6127
return TII->getRegisterInfo().isSGPRReg(MRI, Reg) &&
6228
TII->getRegisterInfo().getRegSizeInBits(Reg, MRI) ==
@@ -66,7 +32,7 @@ bool GCNLaneMaskUtils::maybeLaneMask(Register Reg) const {
6632
/// Determine whether the lane-mask register \p Reg is a wave-wide constant.
6733
/// If so, the value is stored in \p Val.
6834
bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {
69-
MachineRegisterInfo &MRI = MF->getRegInfo();
35+
MachineRegisterInfo &MRI = MF.getRegInfo();
7036

7137
const MachineInstr *MI;
7238
for (;;) {
@@ -90,7 +56,7 @@ bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {
9056
return false;
9157
}
9258

93-
if (MI->getOpcode() != Constants->OpMov)
59+
if (MI->getOpcode() != LMC.MovOpc)
9460
return false;
9561

9662
if (!MI->getOperand(1).isImm())
@@ -111,8 +77,8 @@ bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {
11177

11278
/// Create a virtual lanemask register.
11379
Register GCNLaneMaskUtils::createLaneMaskReg() const {
114-
MachineRegisterInfo &MRI = MF->getRegInfo();
115-
return MRI.createVirtualRegister(Constants->RegClass);
80+
MachineRegisterInfo &MRI = MF.getRegInfo();
81+
return MRI.createVirtualRegister(LMC.LaneMaskRC);
11682
}
11783

11884
/// Insert the moral equivalent of
@@ -140,7 +106,7 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
140106
Register PrevReg, Register CurReg,
141107
GCNLaneMaskAnalysis *LMA,
142108
bool accumulating) const {
143-
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
109+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
144110
const SIInstrInfo *TII = ST.getInstrInfo();
145111
bool PrevVal = false;
146112
bool PrevConstant = !PrevReg || isConstantLaneMask(PrevReg, PrevVal);
@@ -155,10 +121,10 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
155121
} else if (CurVal) {
156122
// If PrevReg is undef, prefer to propagate a full constant.
157123
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
158-
.addReg(PrevReg ? Constants->RegExec : CurReg);
124+
.addReg(PrevReg ? LMC.ExecReg : CurReg);
159125
} else {
160-
BuildMI(MBB, I, DL, TII->get(Constants->OpXor), DstReg)
161-
.addReg(Constants->RegExec)
126+
BuildMI(MBB, I, DL, TII->get(LMC.XorOpc), DstReg)
127+
.addReg(LMC.ExecReg)
162128
.addImm(-1);
163129
}
164130
return;
@@ -174,9 +140,9 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
174140
} else {
175141
PrevMaskedReg = createLaneMaskReg();
176142
PrevMaskedBuilt =
177-
BuildMI(MBB, I, DL, TII->get(Constants->OpAndN2), PrevMaskedReg)
143+
BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg)
178144
.addReg(PrevReg)
179-
.addReg(Constants->RegExec);
145+
.addReg(LMC.ExecReg);
180146
}
181147
}
182148
if (!CurConstant) {
@@ -186,9 +152,9 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
186152
} else {
187153
CurMaskedReg = createLaneMaskReg();
188154
CurMaskedBuilt =
189-
BuildMI(MBB, I, DL, TII->get(Constants->OpAnd), CurMaskedReg)
155+
BuildMI(MBB, I, DL, TII->get(LMC.AndOpc), CurMaskedReg)
190156
.addReg(CurReg)
191-
.addReg(Constants->RegExec);
157+
.addReg(LMC.ExecReg);
192158
}
193159
}
194160

@@ -208,13 +174,13 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
208174
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(PrevMaskedReg);
209175
}
210176
} else if (PrevConstant && PrevVal) {
211-
BuildMI(MBB, I, DL, TII->get(Constants->OpOrN2), DstReg)
177+
BuildMI(MBB, I, DL, TII->get(LMC.OrN2Opc), DstReg)
212178
.addReg(CurMaskedReg)
213-
.addReg(Constants->RegExec);
179+
.addReg(LMC.ExecReg);
214180
} else {
215-
BuildMI(MBB, I, DL, TII->get(Constants->OpOr), DstReg)
181+
BuildMI(MBB, I, DL, TII->get(LMC.OrOpc), DstReg)
216182
.addReg(PrevMaskedReg)
217-
.addReg(CurMaskedReg ? CurMaskedReg : Constants->RegExec);
183+
.addReg(CurMaskedReg ? CurMaskedReg : LMC.ExecReg);
218184
}
219185
}
220186

@@ -226,10 +192,11 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
226192
unsigned RemainingDepth) {
227193
MachineRegisterInfo &MRI = LMU.function()->getRegInfo();
228194
MachineInstr *DefInstr = nullptr;
195+
const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts();
229196

230197
for (;;) {
231198
if (!Register::isVirtualRegister(Reg)) {
232-
if (Reg == LMU.consts().RegExec &&
199+
if (Reg == LMC.ExecReg &&
233200
(!DefInstr || DefInstr->getParent() == &UseBlock))
234201
return true;
235202
return false;
@@ -241,7 +208,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
241208
continue;
242209
}
243210

244-
if (DefInstr->getOpcode() == LMU.consts().OpMov) {
211+
if (DefInstr->getOpcode() == LMC.MovOpc) {
245212
if (DefInstr->getOperand(1).isImm() &&
246213
DefInstr->getOperand(1).getImm() == 0)
247214
return true;
@@ -268,11 +235,11 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
268235
if (!RemainingDepth--)
269236
return false;
270237

271-
bool LikeOr = DefInstr->getOpcode() == LMU.consts().OpOr ||
272-
DefInstr->getOpcode() == LMU.consts().OpXor ||
273-
DefInstr->getOpcode() == LMU.consts().OpCSelect;
274-
bool IsAnd = DefInstr->getOpcode() == LMU.consts().OpAnd;
275-
bool IsAndN2 = DefInstr->getOpcode() == LMU.consts().OpAndN2;
238+
bool LikeOr = DefInstr->getOpcode() == LMC.OrOpc ||
239+
DefInstr->getOpcode() == LMC.XorOpc ||
240+
DefInstr->getOpcode() == LMC.CSelectOpc;
241+
bool IsAnd = DefInstr->getOpcode() == LMC.AndOpc;
242+
bool IsAndN2 = DefInstr->getOpcode() == LMC.AndN2Opc;
276243
if ((LikeOr || IsAnd || IsAndN2) &&
277244
(DefInstr->getOperand(1).isReg() && DefInstr->getOperand(2).isReg())) {
278245
bool FirstIsSubset = isSubsetOfExec(DefInstr->getOperand(1).getReg(),
@@ -301,7 +268,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
301268
void GCNLaneMaskUpdater::init(Register Reg) {
302269
Processed = false;
303270
Blocks.clear();
304-
//SSAUpdater.Initialize(LMU.consts().RegClass);
271+
//SSAUpdater.Initialize(LMU.getLaneMaskConsts().LaneMaskRC);
305272
SSAUpdater.Initialize(Reg);
306273
}
307274

@@ -451,7 +418,7 @@ void GCNLaneMaskUpdater::process() {
451418
// Prepare an all-zero value for the default and reset in accumulating mode.
452419
if (Accumulating && !ZeroReg) {
453420
ZeroReg = LMU.createLaneMaskReg();
454-
BuildMI(Entry, Entry.getFirstTerminator(), {}, TII->get(LMU.consts().OpMov),
421+
BuildMI(Entry, Entry.getFirstTerminator(), {}, TII->get(LMU.getLaneMaskConsts().MovOpc),
455422
ZeroReg)
456423
.addImm(0);
457424
}

0 commit comments

Comments
 (0)