Skip to content

Commit 88c668d

Browse files
authored
[AMDGPU][SIInsertWaitCnts] De-duplicate code (NFC) (#161161)
I'm reading through the pass over and over again to try and learn how it works. I noticed some code duplication here and there while doing that.
1 parent 14fcd81 commit 88c668d

File tree

2 files changed

+34
-35
lines changed

2 files changed

+34
-35
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1853,26 +1853,24 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18531853
assert(!MI.isMetaInstruction());
18541854

18551855
AMDGPU::Waitcnt Wait;
1856+
const unsigned Opc = MI.getOpcode();
18561857

18571858
// FIXME: This should have already been handled by the memory legalizer.
18581859
// Removing this currently doesn't affect any lit tests, but we need to
18591860
// verify that nothing was relying on this. The number of buffer invalidates
18601861
// being handled here should not be expanded.
1861-
if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
1862-
MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
1863-
MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL ||
1864-
MI.getOpcode() == AMDGPU::BUFFER_GL0_INV ||
1865-
MI.getOpcode() == AMDGPU::BUFFER_GL1_INV) {
1862+
if (Opc == AMDGPU::BUFFER_WBINVL1 || Opc == AMDGPU::BUFFER_WBINVL1_SC ||
1863+
Opc == AMDGPU::BUFFER_WBINVL1_VOL || Opc == AMDGPU::BUFFER_GL0_INV ||
1864+
Opc == AMDGPU::BUFFER_GL1_INV) {
18661865
Wait.LoadCnt = 0;
18671866
}
18681867

18691868
// All waits must be resolved at call return.
18701869
// NOTE: this could be improved with knowledge of all call sites or
18711870
// with knowledge of the called routines.
1872-
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
1873-
MI.getOpcode() == AMDGPU::SI_RETURN ||
1874-
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1875-
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
1871+
if (Opc == AMDGPU::SI_RETURN_TO_EPILOG || Opc == AMDGPU::SI_RETURN ||
1872+
Opc == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1873+
Opc == AMDGPU::S_SETPC_B64_return ||
18761874
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
18771875
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false));
18781876
}
@@ -1884,8 +1882,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18841882
// send a message to explicitly release all VGPRs before the stores have
18851883
// completed, but it is only safe to do this if there are no outstanding
18861884
// scratch stores.
1887-
else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
1888-
MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
1885+
else if (Opc == AMDGPU::S_ENDPGM || Opc == AMDGPU::S_ENDPGM_SAVED) {
18891886
if (!WCG->isOptNone() &&
18901887
(MI.getMF()->getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() ||
18911888
(ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
@@ -1894,8 +1891,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18941891
ReleaseVGPRInsts.insert(&MI);
18951892
}
18961893
// Resolve vm waits before gs-done.
1897-
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
1898-
MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
1894+
else if ((Opc == AMDGPU::S_SENDMSG || Opc == AMDGPU::S_SENDMSGHALT) &&
18991895
ST->hasLegacyGeometry() &&
19001896
((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_PreGFX11_) ==
19011897
AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) {
@@ -1920,7 +1916,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
19201916

19211917
// Wait for any pending GDS instruction to complete before any
19221918
// "Always GDS" instruction.
1923-
if (TII->isAlwaysGDS(MI.getOpcode()) && ScoreBrackets.hasPendingGDS())
1919+
if (TII->isAlwaysGDS(Opc) && ScoreBrackets.hasPendingGDS())
19241920
addWait(Wait, DS_CNT, ScoreBrackets.getPendingGDSWait());
19251921

19261922
if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
@@ -1946,7 +1942,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
19461942
Wait);
19471943
}
19481944
}
1949-
} else if (MI.getOpcode() == AMDGPU::S_BARRIER_WAIT) {
1945+
} else if (Opc == AMDGPU::S_BARRIER_WAIT) {
19501946
ScoreBrackets.tryClearSCCWriteEvent(&MI);
19511947
} else {
19521948
// FIXME: Should not be relying on memoperands.
@@ -2061,8 +2057,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
20612057
//
20622058
// In all other cases, ensure safety by ensuring that there are no outstanding
20632059
// memory operations.
2064-
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
2065-
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
2060+
if (Opc == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() &&
2061+
!ST->supportsBackOffBarrier()) {
20662062
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
20672063
}
20682064

@@ -2146,19 +2142,19 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
21462142
}
21472143

21482144
// XCnt may be already consumed by a load wait.
2149-
if (Wait.KmCnt == 0 && Wait.XCnt != ~0u &&
2150-
!ScoreBrackets.hasPendingEvent(SMEM_GROUP))
2151-
Wait.XCnt = ~0u;
2145+
if (Wait.XCnt != ~0u) {
2146+
if (Wait.KmCnt == 0 && !ScoreBrackets.hasPendingEvent(SMEM_GROUP))
2147+
Wait.XCnt = ~0u;
21522148

2153-
if (Wait.LoadCnt == 0 && Wait.XCnt != ~0u &&
2154-
!ScoreBrackets.hasPendingEvent(VMEM_GROUP))
2155-
Wait.XCnt = ~0u;
2149+
if (Wait.LoadCnt == 0 && !ScoreBrackets.hasPendingEvent(VMEM_GROUP))
2150+
Wait.XCnt = ~0u;
21562151

2157-
// Since the translation for VMEM addresses occur in-order, we can skip the
2158-
// XCnt if the current instruction is of VMEM type and has a memory dependency
2159-
// with another VMEM instruction in flight.
2160-
if (Wait.XCnt != ~0u && isVmemAccess(*It))
2161-
Wait.XCnt = ~0u;
2152+
// Since the translation for VMEM addresses occur in-order, we can skip the
2153+
// XCnt if the current instruction is of VMEM type and has a memory
2154+
// dependency with another VMEM instruction in flight.
2155+
if (isVmemAccess(*It))
2156+
Wait.XCnt = ~0u;
2157+
}
21622158

21632159
if (WCG->createNewWaitcnt(Block, It, Wait))
21642160
Modified = true;
@@ -2395,9 +2391,8 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
23952391
unsigned OldEventsHasSCCWrite = OldEvents & (1 << SCC_WRITE);
23962392
if (!OldEventsHasSCCWrite) {
23972393
PendingSCCWrite = Other.PendingSCCWrite;
2398-
} else {
2399-
if (PendingSCCWrite != Other.PendingSCCWrite)
2400-
PendingSCCWrite = nullptr;
2394+
} else if (PendingSCCWrite != Other.PendingSCCWrite) {
2395+
PendingSCCWrite = nullptr;
24012396
}
24022397
}
24032398
}
@@ -2635,11 +2630,10 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
26352630
for (MachineBasicBlock *MBB : ML->blocks()) {
26362631
for (MachineInstr &MI : *MBB) {
26372632
if (isVMEMOrFlatVMEM(MI)) {
2638-
if (MI.mayLoad())
2639-
HasVMemLoad = true;
2640-
if (MI.mayStore())
2641-
HasVMemStore = true;
2633+
HasVMemLoad |= MI.mayLoad();
2634+
HasVMemStore |= MI.mayStore();
26422635
}
2636+
26432637
for (const MachineOperand &Op : MI.all_uses()) {
26442638
if (Op.isDebug() || !TRI->isVectorRegister(*MRI, Op.getReg()))
26452639
continue;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,11 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
10331033
Opc == AMDGPU::GLOBAL_WBINV;
10341034
}
10351035

1036+
static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
1037+
return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
1038+
Opc == AMDGPU::GLOBAL_WBINV;
1039+
}
1040+
10361041
static bool isF16PseudoScalarTrans(unsigned Opcode) {
10371042
return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
10381043
Opcode == AMDGPU::V_S_LOG_F16_e64 ||

0 commit comments

Comments
 (0)