diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 92985f971f17a..cb29d5d947598 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1496,6 +1496,7 @@ def FeatureISAVersion12 : FeatureSet< FeatureWavefrontSize32, FeatureShaderCyclesHiLoRegisters, FeatureArchitectedFlatScratch, + FeatureArchitectedSGPRs, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, FeatureAtomicDsPkAdd16Insts, diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll index 9965d214cc9b3..380a13ed16128 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll @@ -41,30 +41,30 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() { ; ; GFX12-LABEL: indirect_call_known_no_special_inputs: ; GFX12: ; %bb.0: ; %bb +; GFX12-NEXT: s_getpc_b64 s[2:3] +; GFX12-NEXT: s_sext_i32_i16 s3, s3 +; GFX12-NEXT: s_add_co_u32 s2, s2, snork@gotpcrel32@lo+8 +; GFX12-NEXT: s_add_co_ci_u32 s3, s3, snork@gotpcrel32@hi+16 +; GFX12-NEXT: s_mov_b64 s[0:1], 0 ; GFX12-NEXT: s_getpc_b64 s[4:5] ; GFX12-NEXT: s_sext_i32_i16 s5, s5 -; GFX12-NEXT: s_add_co_u32 s4, s4, snork@gotpcrel32@lo+8 -; GFX12-NEXT: s_add_co_ci_u32 s5, s5, snork@gotpcrel32@hi+16 -; GFX12-NEXT: s_mov_b64 s[2:3], 0 -; GFX12-NEXT: s_getpc_b64 s[6:7] -; GFX12-NEXT: s_sext_i32_i16 s7, s7 -; GFX12-NEXT: s_add_co_u32 s6, s6, wobble@gotpcrel32@lo+8 -; GFX12-NEXT: s_add_co_ci_u32 s7, s7, wobble@gotpcrel32@hi+16 -; GFX12-NEXT: s_load_u8 s1, s[2:3], 0x0 +; GFX12-NEXT: s_add_co_u32 s4, s4, wobble@gotpcrel32@lo+8 +; GFX12-NEXT: s_add_co_ci_u32 s5, s5, wobble@gotpcrel32@hi+16 +; GFX12-NEXT: s_load_u8 s6, s[0:1], 0x0 +; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 ; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 -; GFX12-NEXT: s_load_b64 s[4:5], s[6:7], 0x0 ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0 ; GFX12-NEXT: v_mov_b32_e32 v31, v0 +; GFX12-NEXT: s_mov_b32 s12, ttmp9 ; GFX12-NEXT: s_mov_b64 s[8:9], 0 -; GFX12-NEXT: s_mov_b32 s12, s0 ; GFX12-NEXT: s_mov_b32 s32, 0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_and_b32 s1, 1, s1 +; GFX12-NEXT: s_and_b32 s4, 1, s6 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_cmp_eq_u32 s1, 1 -; GFX12-NEXT: s_cselect_b32 s3, s5, s3 -; GFX12-NEXT: s_cselect_b32 s2, s4, s2 -; GFX12-NEXT: s_swappc_b64 s[30:31], s[2:3] +; GFX12-NEXT: s_cmp_eq_u32 s4, 1 +; GFX12-NEXT: s_cselect_b32 s1, s3, s1 +; GFX12-NEXT: s_cselect_b32 s0, s2, s0 +; GFX12-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX12-NEXT: s_endpgm bb: diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll index c732ff7094255..495b54758de04 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_cs void @_amdgpu_cs_main() { ; GFX9-SDAG-LABEL: _amdgpu_cs_main: @@ -23,6 +25,30 @@ define amdgpu_cs void @_amdgpu_cs_main() { ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 ; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: _amdgpu_cs_main: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s1 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_nop 0 +; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: _amdgpu_cs_main: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_nop 0 +; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-GISEL-NEXT: s_endpgm .entry: %idx = call i32 @llvm.amdgcn.workgroup.id.x() %idy = call i32 @llvm.amdgcn.workgroup.id.y() @@ -68,6 +94,24 @@ define amdgpu_cs void @caller() { ; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] ; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: caller: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: caller: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-GISEL-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() call amdgpu_gfx void @callee(i32 %idx) ret void @@ -79,3 +123,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x() declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z() declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX12: {{.*}} +; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll index c492b54759d82..769e6b0964abd 100644 --- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll @@ -1,25 +1,47 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { -; GCN-SDAG-LABEL: workgroup_id_x: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-SDAG-NEXT: s_endpgm +; GFX9-SDAG-LABEL: workgroup_id_x: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-SDAG-NEXT: s_endpgm ; -; GCN-GISEL-LABEL: workgroup_id_x: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GCN-GISEL-NEXT: s_endpgm +; GFX9-GISEL-LABEL: workgroup_id_x: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: workgroup_id_x: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-SDAG-NEXT: s_nop 0 +; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: workgroup_id_x: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX12-GISEL-NEXT: s_nop 0 +; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-GISEL-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() store i32 %idx, ptr addrspace(1) %ptrx @@ -27,27 +49,29 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { } define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) { -; GCN-SDAG-LABEL: workgroup_id_xy: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp7 -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3] -; GCN-SDAG-NEXT: s_endpgm +; GFX9-LABEL: workgroup_id_xy: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7 +; GFX9-NEXT: global_store_dword v0, v1, s[2:3] +; GFX9-NEXT: s_endpgm ; -; GCN-GISEL-LABEL: workgroup_id_xy: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GCN-GISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp7 -; GCN-GISEL-NEXT: global_store_dword v0, v1, s[2:3] -; GCN-GISEL-NEXT: s_endpgm +; GFX12-LABEL: workgroup_id_xy: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9 +; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX12-NEXT: global_store_b32 v0, v2, s[2:3] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() store i32 %idx, ptr addrspace(1) %ptrx %idy = call i32 @llvm.amdgcn.workgroup.id.y() @@ -57,37 +81,56 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace } define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) { -; GCN-SDAG-LABEL: workgroup_id_xyz: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 -; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1] -; GCN-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GCN-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3] -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0 -; GCN-SDAG-NEXT: global_store_dword v0, v1, s[6:7] -; GCN-SDAG-NEXT: s_endpgm +; GFX9-SDAG-LABEL: workgroup_id_xyz: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[2:3] +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[6:7] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: workgroup_id_xyz: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3] +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[6:7] +; GFX9-GISEL-NEXT: s_endpgm ; -; GCN-GISEL-LABEL: workgroup_id_xyz: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GCN-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GCN-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[2:3] -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GCN-GISEL-NEXT: global_store_dword v1, v0, s[6:7] -; GCN-GISEL-NEXT: s_endpgm +; GFX12-LABEL: workgroup_id_xyz: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0 +; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10 +; GFX12-NEXT: s_and_b32 s2, ttmp7, 0xffff +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9 +; GFX12-NEXT: s_lshr_b32 s3, ttmp7, 16 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_clause 0x2 +; GFX12-NEXT: global_store_b32 v0, v1, s[4:5] +; GFX12-NEXT: global_store_b32 v0, v2, s[6:7] +; GFX12-NEXT: global_store_b32 v0, v3, s[0:1] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm %idx = call i32 @llvm.amdgcn.workgroup.id.x() store i32 %idx, ptr addrspace(1) %ptrx %idy = call i32 @llvm.amdgcn.workgroup.id.y()