Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ class LaneMaskConstants {
const unsigned MovTermOpc;
const unsigned OrOpc;
const unsigned OrTermOpc;
const unsigned OrN2Opc;
const unsigned OrSaveExecOpc;
const unsigned XorOpc;
const unsigned XorTermOpc;
const unsigned WQMOpc;
const TargetRegisterClass *LaneMaskRC;

constexpr LaneMaskConstants(bool IsWave32)
: ExecReg(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC),
Expand All @@ -62,11 +64,13 @@ class LaneMaskConstants {
MovTermOpc(IsWave32 ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term),
OrOpc(IsWave32 ? AMDGPU::S_OR_B32 : AMDGPU::S_OR_B64),
OrTermOpc(IsWave32 ? AMDGPU::S_OR_B32_term : AMDGPU::S_OR_B64_term),
OrN2Opc(IsWave32 ? AMDGPU::S_ORN2_B32 : AMDGPU::S_ORN2_B64),
OrSaveExecOpc(IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32
: AMDGPU::S_OR_SAVEEXEC_B64),
XorOpc(IsWave32 ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64),
XorTermOpc(IsWave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term),
WQMOpc(IsWave32 ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64) {}
WQMOpc(IsWave32 ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
LaneMaskRC(IsWave32 ? &AMDGPU::SReg_32RegClass : &AMDGPU::SReg_64RegClass) {}

static inline const LaneMaskConstants &get(const GCNSubtarget &ST);
};
Expand Down
41 changes: 20 additions & 21 deletions llvm/lib/Target/AMDGPU/AMDGPUWaveTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1785,13 +1785,14 @@ void ControlFlowRewriter::prepareWaveCfg() {
/// manipulation.
void ControlFlowRewriter::rewrite() {
GCNLaneMaskAnalysis LMA(Function);
const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts();

Register RegAllOnes;
auto getAllOnes = [&]() {
if (!RegAllOnes) {
RegAllOnes = LMU.createLaneMaskReg();
BuildMI(Function.front(), Function.front().getFirstTerminator(), {},
TII.get(LMU.consts().OpMov), RegAllOnes)
TII.get(LMC.MovOpc), RegAllOnes)
.addImm(-1);
}
return RegAllOnes;
Expand Down Expand Up @@ -1841,12 +1842,12 @@ void ControlFlowRewriter::rewrite() {
if (!LMA.isSubsetOfExec(CondReg, *Node->Block)) {
CondReg = LMU.createLaneMaskReg();
BuildMI(*Node->Block, Node->Block->end(), {},
TII.get(LMU.consts().OpAnd), CondReg)
.addReg(LMU.consts().RegExec)
TII.get(LMC.AndOpc), CondReg)
.addReg(LMC.ExecReg)
.addReg(Info.OrigCondition);
}
BuildMI(*Node->Block, Node->Block->end(), {}, TII.get(AMDGPU::COPY),
LMU.consts().RegVcc)
LMC.VccReg)
.addReg(CondReg);

Opcode = AMDGPU::S_CBRANCH_VCCNZ;
Expand Down Expand Up @@ -1924,15 +1925,15 @@ void ControlFlowRewriter::rewrite() {
if (!LaneOrigin.InvertCondition) {
BuildMI(*LaneOrigin.Node->Block,
LaneOrigin.Node->Block->getFirstTerminator(), {},
TII.get(LMU.consts().OpCSelect), CondReg)
.addReg(LMU.consts().RegExec)
TII.get(LMC.CSelectOpc), CondReg)
.addReg(LMC.ExecReg)
.addImm(0);
} else {
BuildMI(*LaneOrigin.Node->Block,
LaneOrigin.Node->Block->getFirstTerminator(), {},
TII.get(LMU.consts().OpCSelect), CondReg)
TII.get(LMC.CSelectOpc), CondReg)
.addImm(0)
.addReg(LMU.consts().RegExec);
.addReg(LMC.ExecReg);
}
} else {
CondReg = LaneOrigin.CondReg;
Expand All @@ -1941,8 +1942,8 @@ void ControlFlowRewriter::rewrite() {
CondReg = LMU.createLaneMaskReg();
BuildMI(*LaneOrigin.Node->Block,
LaneOrigin.Node->Block->getFirstTerminator(), {},
TII.get(LMU.consts().OpAnd), CondReg)
.addReg(LMU.consts().RegExec)
TII.get(LMC.AndOpc), CondReg)
.addReg(LMC.ExecReg)
.addReg(Prev);

RegMap[std::make_pair(LaneOrigin.Node->Block, LaneOrigin.CondReg)]
Expand All @@ -1962,7 +1963,7 @@ void ControlFlowRewriter::rewrite() {
CondReg = LMU.createLaneMaskReg();
BuildMI(*LaneOrigin.Node->Block,
LaneOrigin.Node->Block->getFirstTerminator(), {},
TII.get(LMU.consts().OpXor), CondReg)
TII.get(LMC.XorOpc), CondReg)
.addReg(LaneOrigin.CondReg)
.addImm(-1);

Expand Down Expand Up @@ -1999,7 +2000,7 @@ void ControlFlowRewriter::rewrite() {
<< '\n');

BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
TII.get(LMU.consts().OpMovTerm), LMU.consts().RegExec)
TII.get(LMC.MovTermOpc), LMC.ExecReg)
.addReg(OriginCFGNodeInfo.PrimarySuccessorExec);
BuildMI(*OriginNode->Block, OriginNode->Block->end(), {},
TII.get(AMDGPU::SI_WAVE_CF_EDGE));
Expand Down Expand Up @@ -2046,12 +2047,12 @@ void ControlFlowRewriter::rewrite() {
Register Rejoin;

if (PrimaryExecDef->getParent() == Pred->Block &&
PrimaryExecDef->getOpcode() == LMU.consts().OpXor &&
PrimaryExecDef->getOpcode() == LMC.XorOpc &&
PrimaryExecDef->getOperand(1).isReg() &&
PrimaryExecDef->getOperand(2).isReg()) {
if (PrimaryExecDef->getOperand(1).getReg() == LMU.consts().RegExec)
if (PrimaryExecDef->getOperand(1).getReg() == LMC.ExecReg)
Rejoin = PrimaryExecDef->getOperand(2).getReg();
else if (PrimaryExecDef->getOperand(2).getReg() == LMU.consts().RegExec)
else if (PrimaryExecDef->getOperand(2).getReg() == LMC.ExecReg)
Rejoin = PrimaryExecDef->getOperand(1).getReg();
}

Expand All @@ -2069,8 +2070,8 @@ void ControlFlowRewriter::rewrite() {
if (!Rejoin) {
Rejoin = LMU.createLaneMaskReg();
BuildMI(*Pred->Block, Pred->Block->getFirstTerminator(), {},
TII.get(LMU.consts().OpXor), Rejoin)
.addReg(LMU.consts().RegExec)
TII.get(LMC.XorOpc), Rejoin)
.addReg(LMC.ExecReg)
.addReg(PrimaryExec);
}

Expand All @@ -2084,8 +2085,8 @@ void ControlFlowRewriter::rewrite() {

Register Rejoin = Updater.getValueInMiddleOfBlock(*Secondary->Block);
BuildMI(*Secondary->Block, Secondary->Block->getFirstNonPHI(), {},
TII.get(LMU.consts().OpOr), LMU.consts().RegExec)
.addReg(LMU.consts().RegExec)
TII.get(LMC.OrOpc), LMC.ExecReg)
.addReg(LMC.ExecReg)
.addReg(Rejoin);

LLVM_DEBUG(Function.dump());
Expand Down Expand Up @@ -2136,7 +2137,6 @@ class AMDGPUWaveTransform : public MachineFunctionPass {
MachineDominatorTree *DomTree = nullptr;
// MachineConvergenceInfo ConvergenceInfo;
MachineCycleInfo *CycleInfo;
GCNLaneMaskUtils LMU;
const SIInstrInfo *TII;
};

Expand All @@ -2163,7 +2163,6 @@ bool AMDGPUWaveTransform::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "AMDGPU Wave Transformnsform: " << MF.getName() << '\n');

DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
LMU.setFunction(MF);
TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();

// ConvergenceInfo = computeMachineConvergenceInfo(MF, *DomTree);
Expand Down
89 changes: 28 additions & 61 deletions llvm/lib/Target/AMDGPU/GCNLaneMaskUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,47 +16,13 @@

using namespace llvm;

/// Obtain a reference to the global wavefront-size dependent constants
/// based on \p wavefrontSize.
const GCNLaneMaskConstants *
GCNLaneMaskUtils::getConsts(unsigned WavefrontSize) {
static const GCNLaneMaskConstants Wave32 = {
AMDGPU::EXEC_LO, AMDGPU::VCC_LO, &AMDGPU::SReg_32RegClass,
AMDGPU::S_MOV_B32, AMDGPU::S_MOV_B32_term, AMDGPU::S_AND_B32,
AMDGPU::S_OR_B32, AMDGPU::S_XOR_B32, AMDGPU::S_ANDN2_B32,
AMDGPU::S_ORN2_B32, AMDGPU::S_CSELECT_B32,
};
static const GCNLaneMaskConstants Wave64 = {
AMDGPU::EXEC,
AMDGPU::VCC,
&AMDGPU::SReg_64RegClass,
AMDGPU::S_MOV_B64,
AMDGPU::S_MOV_B64_term,
AMDGPU::S_AND_B64,
AMDGPU::S_OR_B64,
AMDGPU::S_XOR_B64,
AMDGPU::S_ANDN2_B64,
AMDGPU::S_ORN2_B64,
AMDGPU::S_CSELECT_B64,
};
assert(WavefrontSize == 32 || WavefrontSize == 64);
return WavefrontSize == 32 ? &Wave32 : &Wave64;
}

/// Obtain a reference to the global wavefront-size dependent constants
/// based on the wavefront-size of \p function.
const GCNLaneMaskConstants *GCNLaneMaskUtils::getConsts(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
return getConsts(ST.getWavefrontSize());
}

/// Check whether the register could be a lane-mask register.
///
/// It does not distinguish between lane-masks and scalar registers that happen
/// to have the right bitsize.
bool GCNLaneMaskUtils::maybeLaneMask(Register Reg) const {
MachineRegisterInfo &MRI = MF->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
return TII->getRegisterInfo().isSGPRReg(MRI, Reg) &&
TII->getRegisterInfo().getRegSizeInBits(Reg, MRI) ==
Expand All @@ -66,7 +32,7 @@ bool GCNLaneMaskUtils::maybeLaneMask(Register Reg) const {
/// Determine whether the lane-mask register \p Reg is a wave-wide constant.
/// If so, the value is stored in \p Val.
bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();

const MachineInstr *MI;
for (;;) {
Expand All @@ -90,7 +56,7 @@ bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {
return false;
}

if (MI->getOpcode() != Constants->OpMov)
if (MI->getOpcode() != LMC.MovOpc)
return false;

if (!MI->getOperand(1).isImm())
Expand All @@ -111,8 +77,8 @@ bool GCNLaneMaskUtils::isConstantLaneMask(Register Reg, bool &Val) const {

/// Create a virtual lanemask register.
Register GCNLaneMaskUtils::createLaneMaskReg() const {
MachineRegisterInfo &MRI = MF->getRegInfo();
return MRI.createVirtualRegister(Constants->RegClass);
MachineRegisterInfo &MRI = MF.getRegInfo();
return MRI.createVirtualRegister(LMC.LaneMaskRC);
}

/// Insert the moral equivalent of
Expand Down Expand Up @@ -140,7 +106,7 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
Register PrevReg, Register CurReg,
GCNLaneMaskAnalysis *LMA,
bool accumulating) const {
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
bool PrevVal = false;
bool PrevConstant = !PrevReg || isConstantLaneMask(PrevReg, PrevVal);
Expand All @@ -155,10 +121,10 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
} else if (CurVal) {
// If PrevReg is undef, prefer to propagate a full constant.
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
.addReg(PrevReg ? Constants->RegExec : CurReg);
.addReg(PrevReg ? LMC.ExecReg : CurReg);
} else {
BuildMI(MBB, I, DL, TII->get(Constants->OpXor), DstReg)
.addReg(Constants->RegExec)
BuildMI(MBB, I, DL, TII->get(LMC.XorOpc), DstReg)
.addReg(LMC.ExecReg)
.addImm(-1);
}
return;
Expand All @@ -174,9 +140,9 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
} else {
PrevMaskedReg = createLaneMaskReg();
PrevMaskedBuilt =
BuildMI(MBB, I, DL, TII->get(Constants->OpAndN2), PrevMaskedReg)
BuildMI(MBB, I, DL, TII->get(LMC.AndN2Opc), PrevMaskedReg)
.addReg(PrevReg)
.addReg(Constants->RegExec);
.addReg(LMC.ExecReg);
}
}
if (!CurConstant) {
Expand All @@ -186,9 +152,9 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
} else {
CurMaskedReg = createLaneMaskReg();
CurMaskedBuilt =
BuildMI(MBB, I, DL, TII->get(Constants->OpAnd), CurMaskedReg)
BuildMI(MBB, I, DL, TII->get(LMC.AndOpc), CurMaskedReg)
.addReg(CurReg)
.addReg(Constants->RegExec);
.addReg(LMC.ExecReg);
}
}

Expand All @@ -208,13 +174,13 @@ void GCNLaneMaskUtils::buildMergeLaneMasks(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(PrevMaskedReg);
}
} else if (PrevConstant && PrevVal) {
BuildMI(MBB, I, DL, TII->get(Constants->OpOrN2), DstReg)
BuildMI(MBB, I, DL, TII->get(LMC.OrN2Opc), DstReg)
.addReg(CurMaskedReg)
.addReg(Constants->RegExec);
.addReg(LMC.ExecReg);
} else {
BuildMI(MBB, I, DL, TII->get(Constants->OpOr), DstReg)
BuildMI(MBB, I, DL, TII->get(LMC.OrOpc), DstReg)
.addReg(PrevMaskedReg)
.addReg(CurMaskedReg ? CurMaskedReg : Constants->RegExec);
.addReg(CurMaskedReg ? CurMaskedReg : LMC.ExecReg);
}
}

Expand All @@ -226,10 +192,11 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
unsigned RemainingDepth) {
MachineRegisterInfo &MRI = LMU.function()->getRegInfo();
MachineInstr *DefInstr = nullptr;
const AMDGPU::LaneMaskConstants &LMC = LMU.getLaneMaskConsts();

for (;;) {
if (!Register::isVirtualRegister(Reg)) {
if (Reg == LMU.consts().RegExec &&
if (Reg == LMC.ExecReg &&
(!DefInstr || DefInstr->getParent() == &UseBlock))
return true;
return false;
Expand All @@ -241,7 +208,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
continue;
}

if (DefInstr->getOpcode() == LMU.consts().OpMov) {
if (DefInstr->getOpcode() == LMC.MovOpc) {
if (DefInstr->getOperand(1).isImm() &&
DefInstr->getOperand(1).getImm() == 0)
return true;
Expand All @@ -268,11 +235,11 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
if (!RemainingDepth--)
return false;

bool LikeOr = DefInstr->getOpcode() == LMU.consts().OpOr ||
DefInstr->getOpcode() == LMU.consts().OpXor ||
DefInstr->getOpcode() == LMU.consts().OpCSelect;
bool IsAnd = DefInstr->getOpcode() == LMU.consts().OpAnd;
bool IsAndN2 = DefInstr->getOpcode() == LMU.consts().OpAndN2;
bool LikeOr = DefInstr->getOpcode() == LMC.OrOpc ||
DefInstr->getOpcode() == LMC.XorOpc ||
DefInstr->getOpcode() == LMC.CSelectOpc;
bool IsAnd = DefInstr->getOpcode() == LMC.AndOpc;
bool IsAndN2 = DefInstr->getOpcode() == LMC.AndN2Opc;
if ((LikeOr || IsAnd || IsAndN2) &&
(DefInstr->getOperand(1).isReg() && DefInstr->getOperand(2).isReg())) {
bool FirstIsSubset = isSubsetOfExec(DefInstr->getOperand(1).getReg(),
Expand Down Expand Up @@ -301,7 +268,7 @@ bool GCNLaneMaskAnalysis::isSubsetOfExec(Register Reg,
void GCNLaneMaskUpdater::init(Register Reg) {
Processed = false;
Blocks.clear();
//SSAUpdater.Initialize(LMU.consts().RegClass);
//SSAUpdater.Initialize(LMU.getLaneMaskConsts().LaneMaskRC);
SSAUpdater.Initialize(Reg);
}

Expand Down Expand Up @@ -451,7 +418,7 @@ void GCNLaneMaskUpdater::process() {
// Prepare an all-zero value for the default and reset in accumulating mode.
if (Accumulating && !ZeroReg) {
ZeroReg = LMU.createLaneMaskReg();
BuildMI(Entry, Entry.getFirstTerminator(), {}, TII->get(LMU.consts().OpMov),
BuildMI(Entry, Entry.getFirstTerminator(), {}, TII->get(LMU.getLaneMaskConsts().MovOpc),
ZeroReg)
.addImm(0);
}
Expand Down
Loading