From fa2dccb377d0b712223efe5b62e5fc633580a9e6 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 23 Aug 2024 10:31:33 +0100 Subject: [PATCH] [AMDGPU] Remove one case of vmcnt loop header flushing for GFX12 (#105550) When a loop contains a VMEM load whose result is only used outside the loop, do not bother to flush vmcnt in the loop head on GFX12. A wait for vmcnt will be required inside the loop anyway, because VMEM instructions can write their VGPR results out of order. --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 4262e7b5d9c250..eafe20be17d5b9 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -2390,7 +2390,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, } if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside) return true; - return HasVMemLoad && UsesVgprLoadedOutside; + return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder(); } bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir index bdef55ab956a01..0ddd2aa285b264 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir @@ -295,7 +295,7 @@ body: | # GFX12-LABEL: waitcnt_vm_loop2 # GFX12-LABEL: bb.0: # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN -# GFX12: S_WAIT_LOADCNT 0 +# GFX12-NOT: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.1: # GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: @@ -342,7 +342,7 @@ body: | # GFX12-LABEL: waitcnt_vm_loop2_store # GFX12-LABEL: bb.0: # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN -# GFX12: S_WAIT_LOADCNT 0 +# GFX12-NOT: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.1: # GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: @@ -499,9 +499,9 @@ body: | # GFX12-LABEL: waitcnt_vm_loop2_reginterval # GFX12-LABEL: bb.0: # GFX12: GLOBAL_LOAD_DWORDX4 -# GFX12: S_WAIT_LOADCNT 0 -# GFX12-LABEL: bb.1: # GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: name: waitcnt_vm_loop2_reginterval body: | @@ -600,7 +600,7 @@ body: | # GFX12-LABEL: bb.0: # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN -# GFX12: S_WAIT_LOADCNT 0 +# GFX12-NOT: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.1: # GFX12: S_WAIT_LOADCNT 0 # GFX12-LABEL: bb.2: