From 56b8fbf1383055b5dae1b69cf2dedc3a4fec7688 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 21 Aug 2024 16:40:20 +0100 Subject: [PATCH] [AMDGPU] Add GFX12 test coverage for vmcnt flushing in loop headers --- .../CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir index 2417becb7c2167..e51174919b8d3a 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir @@ -1,5 +1,6 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s --- @@ -20,6 +21,13 @@ # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop body: | bb.0: @@ -58,6 +66,13 @@ body: | # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop_noterm +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop_noterm body: | bb.0: @@ -129,6 +144,13 @@ body: | # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop_load +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop_load body: | bb.0: @@ -170,6 +192,13 @@ body: | # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop_no_store +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop_no_store body: | bb.0: @@ -212,6 +241,13 @@ body: | # GFX10-LABEL: bb.1: # GFX10-NOT: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop_no_use +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop_no_use body: | bb.0: @@ -255,6 +291,14 @@ body: | # GFX10-LABEL: bb.1: # GFX10-NOT: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop2 +# GFX12-LABEL: bb.0: +# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop2 body: | bb.0: @@ -294,6 +338,14 @@ body: | # GFX10-LABEL: bb.1: # GFX10-NOT: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop2_store +# GFX12-LABEL: bb.0: +# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop2_store body: | bb.0: @@ -334,6 +386,13 @@ body: | # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop2_use_in_loop +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop2_use_in_loop body: | bb.0: @@ -379,6 +438,15 @@ body: | # GFX10-LABEL: bb.2: # GFX10-NOT: S_WAITCNT 16 # GFX10-LABEL: bb.3: + +# GFX12-LABEL: waitcnt_vm_loop2_nowait +# GFX12-LABEL: bb.0: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.3: name: waitcnt_vm_loop2_nowait body: | bb.0: @@ -427,6 +495,14 @@ body: | # GFX10-LABEL: bb.1: # GFX10-NOT: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop2_reginterval +# GFX12-LABEL: bb.0: +# GFX12: GLOBAL_LOAD_DWORDX4 +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop2_reginterval body: | bb.0: @@ -467,6 +543,13 @@ body: | # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop2_reginterval2 +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop2_reginterval2 body: | bb.0: @@ -513,6 +596,15 @@ body: | # GFX10-NOT: S_WAITCNT 16240 # GFX10-LABEL: bb.2: +# GFX12-LABEL: waitcnt_vm_zero +# GFX12-LABEL: bb.0: +# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN +# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: + name: waitcnt_vm_zero body: | bb.0: @@ -548,6 +640,14 @@ body: | # GFX10-LABEL: bb.1: # GFX10-NOT: S_WAITCNT +# GFX12-LABEL: waitcnt_vm_necessary +# GFX12-LABEL: bb.0: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12: $vgpr4 +# GFX12-NOT: S_WAITCNT +# GFX12-LABEL: bb.1: +# GFX12-NOT: S_WAITCNT + # GFX9-LABEL: waitcnt_vm_necessary # GFX9-LABEL: bb.0: # GFX9: S_WAITCNT 3952 @@ -590,6 +690,13 @@ body: | # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: +# GFX12-LABEL: waitcnt_vm_loop_global_mem +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: + name: waitcnt_vm_loop_global_mem body: | bb.0: @@ -631,6 +738,13 @@ body: | # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: +# GFX12-LABEL: waitcnt_vm_loop_scratch_mem +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: + name: waitcnt_vm_loop_scratch_mem body: | bb.0: @@ -671,6 +785,14 @@ body: | # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 11 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop_flat_mem +# GFX12-LABEL: bb.0: +# GFX12: FLAT_LOAD_DWORD +# GFX12-NOT: S_WAIT_LOADCNT_DSCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT_DSCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop_flat_mem body: | bb.0: @@ -713,6 +835,13 @@ body: | # GFX10-LABEL: bb.1: # GFX10: S_WAITCNT 16 # GFX10-LABEL: bb.2: + +# GFX12-LABEL: waitcnt_vm_loop_flat_load +# GFX12-LABEL: bb.0: +# GFX12-NOT: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.1: +# GFX12: S_WAIT_LOADCNT 0 +# GFX12-LABEL: bb.2: name: waitcnt_vm_loop_flat_load body: | bb.0: