@@ -1853,26 +1853,24 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18531853 assert (!MI.isMetaInstruction ());
18541854
18551855 AMDGPU::Waitcnt Wait;
1856+ const unsigned Opc = MI.getOpcode ();
18561857
18571858 // FIXME: This should have already been handled by the memory legalizer.
18581859 // Removing this currently doesn't affect any lit tests, but we need to
18591860 // verify that nothing was relying on this. The number of buffer invalidates
18601861 // being handled here should not be expanded.
1861- if (MI.getOpcode () == AMDGPU::BUFFER_WBINVL1 ||
1862- MI.getOpcode () == AMDGPU::BUFFER_WBINVL1_SC ||
1863- MI.getOpcode () == AMDGPU::BUFFER_WBINVL1_VOL ||
1864- MI.getOpcode () == AMDGPU::BUFFER_GL0_INV ||
1865- MI.getOpcode () == AMDGPU::BUFFER_GL1_INV) {
1862+ if (Opc == AMDGPU::BUFFER_WBINVL1 || Opc == AMDGPU::BUFFER_WBINVL1_SC ||
1863+ Opc == AMDGPU::BUFFER_WBINVL1_VOL || Opc == AMDGPU::BUFFER_GL0_INV ||
1864+ Opc == AMDGPU::BUFFER_GL1_INV) {
18661865 Wait.LoadCnt = 0 ;
18671866 }
18681867
18691868 // All waits must be resolved at call return.
18701869 // NOTE: this could be improved with knowledge of all call sites or
18711870 // with knowledge of the called routines.
1872- if (MI.getOpcode () == AMDGPU::SI_RETURN_TO_EPILOG ||
1873- MI.getOpcode () == AMDGPU::SI_RETURN ||
1874- MI.getOpcode () == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1875- MI.getOpcode () == AMDGPU::S_SETPC_B64_return ||
1871+ if (Opc == AMDGPU::SI_RETURN_TO_EPILOG || Opc == AMDGPU::SI_RETURN ||
1872+ Opc == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||
1873+ Opc == AMDGPU::S_SETPC_B64_return ||
18761874 (MI.isReturn () && MI.isCall () && !callWaitsOnFunctionEntry (MI))) {
18771875 Wait = Wait.combined (WCG->getAllZeroWaitcnt (/* IncludeVSCnt=*/ false ));
18781876 }
@@ -1884,8 +1882,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18841882 // send a message to explicitly release all VGPRs before the stores have
18851883 // completed, but it is only safe to do this if there are no outstanding
18861884 // scratch stores.
1887- else if (MI.getOpcode () == AMDGPU::S_ENDPGM ||
1888- MI.getOpcode () == AMDGPU::S_ENDPGM_SAVED) {
1885+ else if (Opc == AMDGPU::S_ENDPGM || Opc == AMDGPU::S_ENDPGM_SAVED) {
18891886 if (!WCG->isOptNone () &&
18901887 (MI.getMF ()->getInfo <SIMachineFunctionInfo>()->isDynamicVGPREnabled () ||
18911888 (ST->getGeneration () >= AMDGPUSubtarget::GFX11 &&
@@ -1894,8 +1891,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18941891 ReleaseVGPRInsts.insert (&MI);
18951892 }
18961893 // Resolve vm waits before gs-done.
1897- else if ((MI.getOpcode () == AMDGPU::S_SENDMSG ||
1898- MI.getOpcode () == AMDGPU::S_SENDMSGHALT) &&
1894+ else if ((Opc == AMDGPU::S_SENDMSG || Opc == AMDGPU::S_SENDMSGHALT) &&
18991895 ST->hasLegacyGeometry () &&
19001896 ((MI.getOperand (0 ).getImm () & AMDGPU::SendMsg::ID_MASK_PreGFX11_) ==
19011897 AMDGPU::SendMsg::ID_GS_DONE_PreGFX11)) {
@@ -1920,7 +1916,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
19201916
19211917 // Wait for any pending GDS instruction to complete before any
19221918 // "Always GDS" instruction.
1923- if (TII->isAlwaysGDS (MI. getOpcode () ) && ScoreBrackets.hasPendingGDS ())
1919+ if (TII->isAlwaysGDS (Opc ) && ScoreBrackets.hasPendingGDS ())
19241920 addWait (Wait, DS_CNT, ScoreBrackets.getPendingGDSWait ());
19251921
19261922 if (MI.isCall () && callWaitsOnFunctionEntry (MI)) {
@@ -1946,7 +1942,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
19461942 Wait);
19471943 }
19481944 }
1949- } else if (MI. getOpcode () == AMDGPU::S_BARRIER_WAIT) {
1945+ } else if (Opc == AMDGPU::S_BARRIER_WAIT) {
19501946 ScoreBrackets.tryClearSCCWriteEvent (&MI);
19511947 } else {
19521948 // FIXME: Should not be relying on memoperands.
@@ -2061,8 +2057,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
20612057 //
20622058 // In all other cases, ensure safety by ensuring that there are no outstanding
20632059 // memory operations.
2064- if (MI. getOpcode () == AMDGPU::S_BARRIER &&
2065- !ST->hasAutoWaitcntBeforeBarrier () && !ST-> supportsBackOffBarrier ()) {
2060+ if (Opc == AMDGPU::S_BARRIER && !ST-> hasAutoWaitcntBeforeBarrier () &&
2061+ !ST->supportsBackOffBarrier ()) {
20662062 Wait = Wait.combined (WCG->getAllZeroWaitcnt (/* IncludeVSCnt=*/ true ));
20672063 }
20682064
@@ -2146,19 +2142,19 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
21462142 }
21472143
21482144 // XCnt may be already consumed by a load wait.
2149- if (Wait.KmCnt == 0 && Wait. XCnt != ~0u &&
2150- !ScoreBrackets.hasPendingEvent (SMEM_GROUP))
2151- Wait.XCnt = ~0u ;
2145+ if (Wait.XCnt != ~0u ) {
2146+ if (Wait. KmCnt == 0 && !ScoreBrackets.hasPendingEvent (SMEM_GROUP))
2147+ Wait.XCnt = ~0u ;
21522148
2153- if (Wait.LoadCnt == 0 && Wait.XCnt != ~0u &&
2154- !ScoreBrackets.hasPendingEvent (VMEM_GROUP))
2155- Wait.XCnt = ~0u ;
2149+ if (Wait.LoadCnt == 0 && !ScoreBrackets.hasPendingEvent (VMEM_GROUP))
2150+ Wait.XCnt = ~0u ;
21562151
2157- // Since the translation for VMEM addresses occur in-order, we can skip the
2158- // XCnt if the current instruction is of VMEM type and has a memory dependency
2159- // with another VMEM instruction in flight.
2160- if (Wait.XCnt != ~0u && isVmemAccess (*It))
2161- Wait.XCnt = ~0u ;
2152+ // Since the translation for VMEM addresses occur in-order, we can skip the
2153+ // XCnt if the current instruction is of VMEM type and has a memory
2154+ // dependency with another VMEM instruction in flight.
2155+ if (isVmemAccess (*It))
2156+ Wait.XCnt = ~0u ;
2157+ }
21622158
21632159 if (WCG->createNewWaitcnt (Block, It, Wait))
21642160 Modified = true ;
@@ -2395,9 +2391,8 @@ bool WaitcntBrackets::merge(const WaitcntBrackets &Other) {
23952391 unsigned OldEventsHasSCCWrite = OldEvents & (1 << SCC_WRITE);
23962392 if (!OldEventsHasSCCWrite) {
23972393 PendingSCCWrite = Other.PendingSCCWrite ;
2398- } else {
2399- if (PendingSCCWrite != Other.PendingSCCWrite )
2400- PendingSCCWrite = nullptr ;
2394+ } else if (PendingSCCWrite != Other.PendingSCCWrite ) {
2395+ PendingSCCWrite = nullptr ;
24012396 }
24022397 }
24032398 }
@@ -2635,11 +2630,10 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
26352630 for (MachineBasicBlock *MBB : ML->blocks ()) {
26362631 for (MachineInstr &MI : *MBB) {
26372632 if (isVMEMOrFlatVMEM (MI)) {
2638- if (MI.mayLoad ())
2639- HasVMemLoad = true ;
2640- if (MI.mayStore ())
2641- HasVMemStore = true ;
2633+ HasVMemLoad |= MI.mayLoad ();
2634+ HasVMemStore |= MI.mayStore ();
26422635 }
2636+
26432637 for (const MachineOperand &Op : MI.all_uses ()) {
26442638 if (Op.isDebug () || !TRI->isVectorRegister (*MRI, Op.getReg ()))
26452639 continue ;
0 commit comments