Skip to content

Commit 07e94c4

Browse files
committed
[AMDGPU][WaveTransform] Update GCNLaneMaskUtils to use
AMDGPULaneMaskUtils This patch makes GCNLaneMaskUtils to use LaneMaskConstants defined by globally used AMDGPULaneMaskUtils, instead of GCNLaneMaskConstant eliminating the code duplication.
1 parent 30560ae commit 07e94c4

File tree

4 files changed

+54
-91
lines changed

4 files changed

+54
-91
lines changed

llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@ class LaneMaskConstants {
3636
const unsigned MovTermOpc;
3737
const unsigned OrOpc;
3838
const unsigned OrTermOpc;
39+
const unsigned OrN2Opc;
3940
const unsigned OrSaveExecOpc;
4041
const unsigned XorOpc;
4142
const unsigned XorTermOpc;
4243
const unsigned WQMOpc;
44+
const TargetRegisterClass *SRegClass;
4345

4446
constexpr LaneMaskConstants(bool IsWave32)
4547
: ExecReg(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC),
@@ -62,11 +64,13 @@ class LaneMaskConstants {
6264
MovTermOpc(IsWave32 ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term),
6365
OrOpc(IsWave32 ? AMDGPU::S_OR_B32 : AMDGPU::S_OR_B64),
6466
OrTermOpc(IsWave32 ? AMDGPU::S_OR_B32_term : AMDGPU::S_OR_B64_term),
67+
OrN2Opc(IsWave32 ? AMDGPU::S_ORN2_B32 : AMDGPU::S_ORN2_B64),
6568
OrSaveExecOpc(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
6669
: AMDGPU::S_OR_SAVEEXEC_B64),
6770
XorOpc(IsWave32 ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64),
6871
XorTermOpc(IsWave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term),
69-
WQMOpc(IsWave32 ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64) {}
72+
WQMOpc(IsWave32 ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
73+
SRegClass(IsWave32 ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass) {}
7074

7175
static inline const LaneMaskConstants &get(const GCNSubtarget &ST);
7276
};

llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,7 @@ void ControlFlowRewriter::rewrite() {
17911791
if (!RegAllOnes) {
17921792
RegAllOnes = LMU.createLaneMaskReg();
17931793
BuildMI(Function.front(), Function.front().getFirstTerminator(), {},
1794-
TII.get(LMU.consts().OpMov), RegAllOnes)
1794+
TII.get(LMU.consts().MovOpc), RegAllOnes)
17951795
.addImm(-1);
17961796
}
17971797
return RegAllOnes;
@@ -1841,12 +1841,12 @@ void ControlFlowRewriter::rewrite() {
18411841
if (!LMA.isSubsetOfExec(CondReg, *Node->Block)) {
18421842
CondReg = LMU.createLaneMaskReg();
18431843
BuildMI(*Node->Block, Node->Block->end(), {},
1844-
TII.get(LMU.consts().OpAnd), CondReg)
1845-
.addReg(LMU.consts().RegExec)
1844+
TII.get(LMU.consts().AndOpc), CondReg)
1845+
.addReg(LMU.consts().ExecReg)
18461846
.addReg(Info.OrigCondition);
18471847
}
18481848
BuildMI(*Node->Block, Node->Block->end(), {}, TII.get(AMDGPU::COPY),
1849-
LMU.consts().RegVcc)
1849+
LMU.consts().VccReg)
18501850
.addReg(CondReg);
18511851

18521852
Opcode = AMDGPU::S_CBRANCH_VCCNZ;
@@ -1924,15 +1924,15 @@ void ControlFlowRewriter::rewrite() {
19241924
if (!LaneOrigin.InvertCondition) {
19251925
BuildMI(*LaneOrigin.Node->Block,
19261926
LaneOrigin.Node->Block->getFirstTerminator(), {},
1927-
TII.get(LMU.consts().OpCSelect), CondReg)
1928-
.addReg(LMU.consts().RegExec)
1927+
TII.get(LMU.consts().CSelectOpc), CondReg)
1928+
.addReg(LMU.consts().ExecReg)
19291929
.addImm(0);
19301930
} else {
19311931
BuildMI(*LaneOrigin.Node->Block,
19321932
LaneOrigin.Node->Block->getFirstTerminator(), {},
1933-
TII.get(LMU.consts().OpCSelect), CondReg)
1933+
TII.get(LMU.consts().CSelectOpc), CondReg)
19341934
.addImm(0)
1935-
.addReg(LMU.consts().RegExec);
1935+
.addReg(LMU.consts().ExecReg);
19361936
}
19371937
} else {
19381938
CondReg = LaneOrigin.CondReg;
@@ -1941,8 +1941,8 @@ void ControlFlowRewriter::rewrite() {
19411941
CondReg = LMU.createLaneMaskReg();
19421942
BuildMI(*LaneOrigin.Node->Block,
19431943
LaneOrigin.Node->Block->getFirstTerminator(), {},
1944-
TII.get(LMU.consts().OpAnd), CondReg)
1945-
.addReg(LMU.consts().RegExec)
1944+
TII.get(LMU.consts().AndOpc), CondReg)
1945+
.addReg(LMU.consts().ExecReg)
19461946
.addReg(Prev);
19471947

19481948
RegMap[std::make_pair(LaneOrigin.Node->Block, LaneOrigin.CondReg)]
@@ -1962,7 +1962,7 @@ void ControlFlowRewriter::rewrite() {
19621962
CondReg = LMU.createLaneMaskReg();
19631963
BuildMI(*LaneOrigin.Node->Block,
19641964
LaneOrigin.Node->Block->getFirstTerminator(), {},
1965-
TII.get(LMU.consts().OpXor), CondReg)
1965+
TII.get(LMU.consts().XorOpc), CondReg)
19661966
.addReg(LaneOrigin.CondReg)
19671967
.addImm(-1);
19681968

@@ -1999,7 +1999,7 @@ void ControlFlowRewriter::rewrite() {
19991999
<< '\n');
20002000

20012001
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
2002-
TII.get(LMU.consts().OpMovTerm), LMU.consts().RegExec)
2002+
TII.get(LMU.consts().MovTermOpc), LMU.consts().ExecReg)
20032003
.addReg(OriginCFGNodeInfo.PrimarySuccessorExec);
20042004
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
20052005
TII.get(AMDGPU::SI_WAVE_CF_EDGE));
@@ -2046,12 +2046,12 @@ void ControlFlowRewriter::rewrite() {
20462046
Register Rejoin;
20472047

20482048
if (PrimaryExecDef->getParent() == Pred->Block &&
2049-
PrimaryExecDef->getOpcode() == LMU.consts().OpXor &&
2049+
PrimaryExecDef->getOpcode() == LMU.consts().XorOpc &&
20502050
PrimaryExecDef->getOperand(1).isReg() &&
20512051
PrimaryExecDef->getOperand(2).isReg()) {
2052-
if (PrimaryExecDef->getOperand(1).getReg() == LMU.consts().RegExec)
2052+
if (PrimaryExecDef->getOperand(1).getReg() == LMU.consts().ExecReg)
20532053
Rejoin = PrimaryExecDef->getOperand(2).getReg();
2054-
else if (PrimaryExecDef->getOperand(2).getReg() == LMU.consts().RegExec)
2054+
else if (PrimaryExecDef->getOperand(2).getReg() == LMU.consts().ExecReg)
20552055
Rejoin = PrimaryExecDef->getOperand(1).getReg();
20562056
}
20572057

@@ -2069,8 +2069,8 @@ void ControlFlowRewriter::rewrite() {
20692069
if (!Rejoin) {
20702070
Rejoin = LMU.createLaneMaskReg();
20712071
BuildMI(*Pred->Block, Pred->Block->getFirstTerminator(), {},
2072-
TII.get(LMU.consts().OpXor), Rejoin)
2073-
.addReg(LMU.consts().RegExec)
2072+
TII.get(LMU.consts().XorOpc), Rejoin)
2073+
.addReg(LMU.consts().ExecReg)
20742074
.addReg(PrimaryExec);
20752075
}
20762076

@@ -2084,8 +2084,8 @@ void ControlFlowRewriter::rewrite() {
20842084

20852085
Register Rejoin = Updater.getValueInMiddleOfBlock(*Secondary->Block);
20862086
BuildMI(*Secondary->Block, Secondary->Block->getFirstNonPHI(), {},
2087-
TII.get(LMU.consts().OpOr), LMU.consts().RegExec)
2088-
.addReg(LMU.consts().RegExec)
2087+
TII.get(LMU.consts().OrOpc), LMU.consts().ExecReg)
2088+
.addReg(LMU.consts().ExecReg)
20892089
.addReg(Rejoin);
20902090

20912091
LLVM_DEBUG(Function.dump());

llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp

Lines changed: 24 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,38 +16,11 @@
1616

1717
using namespace llvm;
1818

19-
/// Obtain a reference to the global wavefront-size dependent constants
20-
/// based on \p wavefrontSize.
21-
const GCNLaneMaskConstants *
22-
GCNLaneMaskUtils::getConsts(unsigned WavefrontSize) {
23-
static const GCNLaneMaskConstants Wave32 = {
24-
AMDGPU::EXEC_LO, AMDGPU::VCC_LO, &AMDGPU::SReg_32RegClass,
25-
AMDGPU::S_MOV_B32, AMDGPU::S_MOV_B32_term, AMDGPU::S_AND_B32,
26-
AMDGPU::S_OR_B32, AMDGPU::S_XOR_B32, AMDGPU::S_ANDN2_B32,
27-
AMDGPU::S_ORN2_B32, AMDGPU::S_CSELECT_B32,
28-
};
29-
static const GCNLaneMaskConstants Wave64 = {
30-
AMDGPU::EXEC,
31-
AMDGPU::VCC,
32-
&AMDGPU::SReg_64RegClass,
33-
AMDGPU::S_MOV_B64,
34-
AMDGPU::S_MOV_B64_term,
35-
AMDGPU::S_AND_B64,
36-
AMDGPU::S_OR_B64,
37-
AMDGPU::S_XOR_B64,
38-
AMDGPU::S_ANDN2_B64,
39-
AMDGPU::S_ORN2_B64,
40-
AMDGPU::S_CSELECT_B64,
41-
};
42-
assert(WavefrontSize == 32 || WavefrontSize == 64);
43-
return WavefrontSize == 32 ? &Wave32 : &Wave64;
44-
}
4519

4620
/// Obtain a reference to the global wavefront-size dependent constants
4721
/// based on the wavefront-size of \p function.
48-
const GCNLaneMaskConstants *GCNLaneMaskUtils::getConsts(MachineFunction &MF) {
49-
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
50-
return getConsts(ST.getWavefrontSize());
22+
const AMDGPU::LaneMaskConstants &GCNLaneMaskUtils::getConsts(MachineFunction &MF) {
23+
return AMDGPU::LaneMaskConstants::get(MF.getSubtarget<GCNSubtarget>());
5124
}
5225

5326
/// Check whether the register could be a lane-mask register.
@@ -90,7 +63,7 @@ bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {
9063
return false;
9164
}
9265

93-
if (MI->getOpcode() != Constants->OpMov)
66+
if (MI->getOpcode() != Constants->MovOpc)
9467
return false;
9568

9669
if (!MI->getOperand(1).isImm())
@@ -112,7 +85,7 @@ bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {
11285
/// Create a virtual lanemask register.
11386
Register GCNLaneMaskUtils::createLaneMaskReg() const {
11487
MachineRegisterInfo &MRI = MF->getRegInfo();
115-
return MRI.createVirtualRegister(Constants->RegClass);
88+
return MRI.createVirtualRegister(Constants->SRegClass);
11689
}
11790

11891
/// Insert the moral equivalent of
@@ -155,10 +128,10 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
155128
} else if (CurVal) {
156129
// If PrevReg is undef, prefer to propagate a full constant.
157130
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
158-
.addReg(PrevReg ? Constants->RegExec : CurReg);
131+
.addReg(PrevReg ? Constants->ExecReg : CurReg);
159132
} else {
160-
BuildMI(MBB, I, DL, TII->get(Constants->OpXor), DstReg)
161-
.addReg(Constants->RegExec)
133+
BuildMI(MBB, I, DL, TII->get(Constants->XorOpc), DstReg)
134+
.addReg(Constants->ExecReg)
162135
.addImm(-1);
163136
}
164137
return;
@@ -174,9 +147,9 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
174147
} else {
175148
PrevMaskedReg = createLaneMaskReg();
176149
PrevMaskedBuilt =
177-
BuildMI(MBB, I, DL, TII->get(Constants->OpAndN2), PrevMaskedReg)
150+
BuildMI(MBB, I, DL, TII->get(Constants->AndN2Opc), PrevMaskedReg)
178151
.addReg(PrevReg)
179-
.addReg(Constants->RegExec);
152+
.addReg(Constants->ExecReg);
180153
}
181154
}
182155
if (!CurConstant) {
@@ -186,9 +159,9 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
186159
} else {
187160
CurMaskedReg = createLaneMaskReg();
188161
CurMaskedBuilt =
189-
BuildMI(MBB, I, DL, TII->get(Constants->OpAnd), CurMaskedReg)
162+
BuildMI(MBB, I, DL, TII->get(Constants->AndOpc), CurMaskedReg)
190163
.addReg(CurReg)
191-
.addReg(Constants->RegExec);
164+
.addReg(Constants->ExecReg);
192165
}
193166
}
194167

@@ -208,13 +181,13 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
208181
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(PrevMaskedReg);
209182
}
210183
} else if (PrevConstant && PrevVal) {
211-
BuildMI(MBB, I, DL, TII->get(Constants->OpOrN2), DstReg)
184+
BuildMI(MBB, I, DL, TII->get(Constants->OrN2Opc), DstReg)
212185
.addReg(CurMaskedReg)
213-
.addReg(Constants->RegExec);
186+
.addReg(Constants->ExecReg);
214187
} else {
215-
BuildMI(MBB, I, DL, TII->get(Constants->OpOr), DstReg)
188+
BuildMI(MBB, I, DL, TII->get(Constants->OrOpc), DstReg)
216189
.addReg(PrevMaskedReg)
217-
.addReg(CurMaskedReg ? CurMaskedReg : Constants->RegExec);
190+
.addReg(CurMaskedReg ? CurMaskedReg : Constants->ExecReg);
218191
}
219192
}
220193

@@ -229,7 +202,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
229202

230203
for (;;) {
231204
if (!Register::isVirtualRegister(Reg)) {
232-
if (Reg == LMU.consts().RegExec &&
205+
if (Reg == LMU.consts().ExecReg &&
233206
(!DefInstr || DefInstr->getParent() == &UseBlock))
234207
return true;
235208
return false;
@@ -241,7 +214,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
241214
continue;
242215
}
243216

244-
if (DefInstr->getOpcode() == LMU.consts().OpMov) {
217+
if (DefInstr->getOpcode() == LMU.consts().MovOpc) {
245218
if (DefInstr->getOperand(1).isImm() &&
246219
DefInstr->getOperand(1).getImm() == 0)
247220
return true;
@@ -268,11 +241,11 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
268241
if (!RemainingDepth--)
269242
return false;
270243

271-
bool LikeOr = DefInstr->getOpcode() == LMU.consts().OpOr ||
272-
DefInstr->getOpcode() == LMU.consts().OpXor ||
273-
DefInstr->getOpcode() == LMU.consts().OpCSelect;
274-
bool IsAnd = DefInstr->getOpcode() == LMU.consts().OpAnd;
275-
bool IsAndN2 = DefInstr->getOpcode() == LMU.consts().OpAndN2;
244+
bool LikeOr = DefInstr->getOpcode() == LMU.consts().OrOpc ||
245+
DefInstr->getOpcode() == LMU.consts().XorOpc ||
246+
DefInstr->getOpcode() == LMU.consts().CSelectOpc;
247+
bool IsAnd = DefInstr->getOpcode() == LMU.consts().AndOpc;
248+
bool IsAndN2 = DefInstr->getOpcode() == LMU.consts().AndN2Opc;
276249
if ((LikeOr || IsAnd || IsAndN2) &&
277250
(DefInstr->getOperand(1).isReg() && DefInstr->getOperand(2).isReg())) {
278251
bool FirstIsSubset = isSubsetOfExec(DefInstr->getOperand(1).getReg(),
@@ -301,7 +274,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
301274
void GCNLaneMaskUpdater::init(Register Reg) {
302275
Processed = false;
303276
Blocks.clear();
304-
//SSAUpdater.Initialize(LMU.consts().RegClass);
277+
//SSAUpdater.Initialize(LMU.consts().SRegClass);
305278
SSAUpdater.Initialize(Reg);
306279
}
307280

@@ -451,7 +424,7 @@ void GCNLaneMaskUpdater::process() {
451424
// Prepare an all-zero value for the default and reset in accumulating mode.
452425
if (Accumulating && !ZeroReg) {
453426
ZeroReg = LMU.createLaneMaskReg();
454-
BuildMI(Entry, Entry.getFirstTerminator(), {}, TII->get(LMU.consts().OpMov),
427+
BuildMI(Entry, Entry.getFirstTerminator(), {}, TII->get(LMU.consts().MovOpc),
455428
ZeroReg)
456429
.addImm(0);
457430
}

llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.h

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#ifndef LLVM_LIB_TARGET_AMDGPU_GCNLANEMASKUTILS_H
1616
#define LLVM_LIB_TARGET_AMDGPU_GCNLANEMASKUTILS_H
1717

18+
#include "AMDGPULaneMaskUtils.h"
19+
1820
#include "llvm/ADT/DenseSet.h"
1921
#include "llvm/CodeGen/MachineBasicBlock.h"
2022
#include "llvm/CodeGen/MachineDominators.h"
@@ -25,41 +27,25 @@ namespace llvm {
2527
class GCNLaneMaskAnalysis;
2628
class MachineFunction;
2729

28-
/// \brief Wavefront-size dependent constants.
29-
struct GCNLaneMaskConstants {
30-
Register RegExec; // EXEC / EXEC_LO
31-
Register RegVcc; // VCC / VCC_LO
32-
const TargetRegisterClass *RegClass; // SReg_nnRegClass
33-
unsigned OpMov; // S_MOV_Bnn
34-
unsigned OpMovTerm; // S_MOV_Bnn_term
35-
unsigned OpAnd; // S_AND_Bnn
36-
unsigned OpOr; // S_OR_Bnn
37-
unsigned OpXor; // S_XOR_Bnn
38-
unsigned OpAndN2; // S_ANDN2_Bnn
39-
unsigned OpOrN2; // S_ORN2_Bnn
40-
unsigned OpCSelect; // S_CSELECT_Bnn
41-
};
42-
4330
/// \brief Helper class for lane-mask related tasks.
4431
class GCNLaneMaskUtils {
4532
private:
4633
MachineFunction *MF = nullptr;
47-
const GCNLaneMaskConstants *Constants = nullptr;
34+
const AMDGPU::LaneMaskConstants *Constants = nullptr;
4835

4936
public:
50-
static const GCNLaneMaskConstants *getConsts(unsigned WavefrontSize);
51-
static const GCNLaneMaskConstants *getConsts(MachineFunction &MF);
37+
static const AMDGPU::LaneMaskConstants &getConsts(MachineFunction &MF);
5238

5339
GCNLaneMaskUtils() = default;
5440
explicit GCNLaneMaskUtils(MachineFunction &MF) { setFunction(MF); }
5541

5642
MachineFunction *function() const { return MF; }
5743
void setFunction(MachineFunction &Func) {
5844
MF = &Func;
59-
Constants = getConsts(Func);
45+
Constants = &getConsts(Func);
6046
}
6147

62-
const GCNLaneMaskConstants &consts() const {
48+
const AMDGPU::LaneMaskConstants &consts() const {
6349
assert(Constants);
6450
return *Constants;
6551
}

0 commit comments

Comments
 (0)