diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 245731ad5fc7c9..acb54fd10b90dc 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -612,13 +612,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Reserve null register - it shall never be allocated reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64); - // Disallow vcc_hi allocation in wave32. It may be allocated but most likely - // will result in bugs. - if (isWave32) { - Reserved.set(AMDGPU::VCC); - Reserved.set(AMDGPU::VCC_HI); - } - // Reserve SGPRs. // unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index bf4302c156d83d..4c9c34de7194ce 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -38342,12 +38342,11 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2 ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX10-NEXT: v_and_b32_e32 v6, 1, v6 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v8 ; GFX10-NEXT: v_and_b32_e32 v10, 1, v10 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_and_b32_e32 v5, 1, v5 ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7 ; GFX10-NEXT: v_and_b32_e32 v9, 1, v9 @@ -38366,7 +38365,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_cmp_eq_u32_e64 s17, 1, v4 ; GFX10-NEXT: v_cmp_eq_u32_e64 s18, 1, v2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s19, 1, v0 -; GFX10-NEXT: v_writelane_b32 v40, s35, 3 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s20, 1, v27 ; GFX10-NEXT: v_cmp_eq_u32_e64 s21, 1, v25 ; GFX10-NEXT: v_cmp_eq_u32_e64 s22, 1, v23 @@ -38377,10 +38376,10 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_cmp_eq_u32_e64 s27, 1, v13 ; GFX10-NEXT: v_cmp_eq_u32_e64 s28, 1, v11 ; GFX10-NEXT: v_cmp_eq_u32_e64 s29, 1, v7 -; GFX10-NEXT: v_cmp_eq_u32_e64 s30, 1, v3 -; GFX10-NEXT: v_cmp_eq_u32_e64 s31, 1, v1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s34, 1, v5 -; GFX10-NEXT: v_cmp_eq_u32_e64 s35, 1, v9 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_hi, 1, v3 +; GFX10-NEXT: v_cmp_eq_u32_e64 s30, 1, v1 +; GFX10-NEXT: v_cmp_eq_u32_e64 s31, 1, v5 +; GFX10-NEXT: v_cmp_eq_u32_e64 s34, 1, v9 ; GFX10-NEXT: s_waitcnt vmcnt(32) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v31 ; GFX10-NEXT: s_waitcnt vmcnt(31) @@ -38460,10 +38459,10 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_cndmask_b32_e64 v6, v29, v39, s27 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v28, v26, s28 ; GFX10-NEXT: v_cndmask_b32_e64 v20, v51, v20, s29 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v14, v12, s31 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v55, v16, s30 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v53, v18, s34 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v24, v22, s35 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v14, v12, s30 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v55, v16, vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v2, v53, v18, s31 +; GFX10-NEXT: v_cndmask_b32_e64 v12, v24, v22, s34 ; GFX10-NEXT: v_cndmask_b32_e64 v16, v4, v3, s4 ; GFX10-NEXT: v_perm_b32 v0, v0, v64, 0x5040100 ; GFX10-NEXT: v_perm_b32 v1, v1, v54, 0x5040100 @@ -38481,7 +38480,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_perm_b32 v13, v66, v13, 0x5040100 ; GFX10-NEXT: v_perm_b32 v14, v65, v17, 0x5040100 ; GFX10-NEXT: v_perm_b32 v15, v16, v15, 0x5040100 -; GFX10-NEXT: v_readlane_b32 s35, v40, 3 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index ec3c08ec795235..da64c379672ef7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1259,17 +1259,17 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind { ; GFX10SELDAG-LABEL: isnan_v4f16: ; GFX10SELDAG: ; %bb.0: ; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10SELDAG-NEXT: v_mov_b32_e32 v2, 3 -; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v0, 3 -; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD -; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s5, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v3, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v1, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v0, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v5 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v4 ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 -; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v4 -; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s5 -; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v1, 3 -; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v5 -; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5 ; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10GLISEL-LABEL: isnan_v4f16: diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll index ab6a9dcf71acef..a87fa8bf36d9e7 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll @@ -7404,35 +7404,35 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) % ; GFX12-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v9, s31 ; GFX12-NEXT: v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s35 ; GFX12-NEXT: v_dual_mov_b32 v10, s34 :: v_dual_mov_b32 v3, s5 -; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000 -; GFX12-NEXT: s_lshr_b32 s12, s0, 16 -; GFX12-NEXT: s_mov_b32 s14, s1 -; GFX12-NEXT: s_lshr_b32 s16, s1, 16 -; GFX12-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x100000 +; GFX12-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x100000 ; GFX12-NEXT: s_lshr_b32 s2, s2, 16 ; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v5, s23 ; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v13, s25 +; GFX12-NEXT: s_mov_b32 s12, s1 +; GFX12-NEXT: s_lshr_b32 s14, s1, 16 ; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 ; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v15, s27 ; GFX12-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v7, s7 +; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000 +; GFX12-NEXT: s_lshr_b32 s0, s0, 16 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v17, s19 +; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 -; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s21 ; GFX12-NEXT: v_mov_b32_e32 v18, s20 -; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 +; GFX12-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 ; GFX12-NEXT: s_clause 0x1 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:80 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:64 -; GFX12-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0 +; GFX12-NEXT: v_dual_mov_b32 v1, s17 :: v_dual_mov_b32 v0, s16 ; GFX12-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v2, s2 -; GFX12-NEXT: v_dual_mov_b32 v9, s15 :: v_dual_mov_b32 v8, s14 -; GFX12-NEXT: v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v10, s16 +; GFX12-NEXT: v_dual_mov_b32 v9, s13 :: v_dual_mov_b32 v8, s12 +; GFX12-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v10, s14 ; GFX12-NEXT: v_dual_mov_b32 v21, s11 :: v_dual_mov_b32 v20, s10 -; GFX12-NEXT: v_dual_mov_b32 v23, s13 :: v_dual_mov_b32 v22, s12 +; GFX12-NEXT: v_dual_mov_b32 v23, s1 :: v_dual_mov_b32 v22, s0 ; GFX12-NEXT: s_clause 0x5 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:112 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:96 diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll index 952827b8cd0e71..889755c23bbc72 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll @@ -8808,73 +8808,73 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o ; GFX12-NEXT: v_lshrrev_b16 v2, 8, s6 ; GFX12-NEXT: v_lshrrev_b16 v4, 8, s5 ; GFX12-NEXT: v_lshrrev_b16 v8, 8, s2 -; GFX12-NEXT: s_lshr_b32 s24, s7, 16 +; GFX12-NEXT: s_lshr_b32 s22, s7, 16 ; GFX12-NEXT: v_bfe_i32 v31, v1, 0, 8 -; GFX12-NEXT: s_lshr_b32 s42, s2, 24 -; GFX12-NEXT: s_mov_b32 s48, s7 +; GFX12-NEXT: s_lshr_b32 s40, s2, 24 +; GFX12-NEXT: s_mov_b32 s46, s7 ; GFX12-NEXT: v_lshrrev_b16 v5, 8, s4 ; GFX12-NEXT: v_lshrrev_b16 v7, 8, s1 -; GFX12-NEXT: s_lshr_b32 s26, s6, 16 -; GFX12-NEXT: s_lshr_b32 s44, s1, 16 +; GFX12-NEXT: s_lshr_b32 s24, s6, 16 +; GFX12-NEXT: s_lshr_b32 s42, s1, 16 ; GFX12-NEXT: s_ashr_i64 s[58:59], s[6:7], 56 -; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 ; GFX12-NEXT: v_lshrrev_b16 v6, 8, s3 ; GFX12-NEXT: v_lshrrev_b16 v3, 8, s0 -; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s24 -; GFX12-NEXT: s_lshr_b32 s28, s6, 24 -; GFX12-NEXT: s_lshr_b32 s30, s5, 16 -; GFX12-NEXT: s_lshr_b32 s40, s2, 16 +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s22 +; GFX12-NEXT: s_lshr_b32 s26, s6, 24 +; GFX12-NEXT: s_lshr_b32 s28, s5, 16 +; GFX12-NEXT: s_lshr_b32 s38, s2, 16 ; GFX12-NEXT: v_bfe_i32 v11, v8, 0, 8 ; GFX12-NEXT: v_bfe_i32 v23, v4, 0, 8 ; GFX12-NEXT: v_bfe_i32 v27, v2, 0, 8 ; GFX12-NEXT: v_ashrrev_i32_e32 v32, 31, v31 -; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v34, s25 :: v_dual_mov_b32 v35, s58 -; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s26 -; GFX12-NEXT: v_dual_mov_b32 v56, s43 :: v_dual_mov_b32 v29, s48 -; GFX12-NEXT: v_mov_b32_e32 v30, s49 -; GFX12-NEXT: s_lshr_b32 s46, s0, 24 -; GFX12-NEXT: s_mov_b32 s50, s5 -; GFX12-NEXT: s_mov_b32 s52, s3 -; GFX12-NEXT: s_lshr_b32 s34, s4, 16 -; GFX12-NEXT: s_lshr_b32 s36, s4, 24 -; GFX12-NEXT: s_ashr_i64 s[22:23], s[2:3], 56 +; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v34, s23 :: v_dual_mov_b32 v35, s58 +; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s24 +; GFX12-NEXT: v_dual_mov_b32 v56, s41 :: v_dual_mov_b32 v29, s46 +; GFX12-NEXT: v_mov_b32_e32 v30, s47 +; GFX12-NEXT: s_lshr_b32 s44, s0, 24 +; GFX12-NEXT: s_mov_b32 s48, s5 +; GFX12-NEXT: s_mov_b32 s50, s3 +; GFX12-NEXT: s_lshr_b32 s30, s4, 16 +; GFX12-NEXT: s_lshr_b32 s34, s4, 24 +; GFX12-NEXT: s_ashr_i64 s[54:55], s[2:3], 56 ; GFX12-NEXT: s_ashr_i64 s[56:57], s[4:5], 56 ; GFX12-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX12-NEXT: v_bfe_i32 v19, v5, 0, 8 -; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 -; GFX12-NEXT: s_lshr_b32 s38, s3, 16 -; GFX12-NEXT: s_mov_b32 s54, s1 +; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 +; GFX12-NEXT: s_lshr_b32 s36, s3, 16 +; GFX12-NEXT: s_mov_b32 s52, s1 ; GFX12-NEXT: s_bfe_i64 s[12:13], s[2:3], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[2:3], s[52:53], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[4:5], s[50:51], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[6:7], s[46:47], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[2:3], s[50:51], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[4:5], s[48:49], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[6:7], s[44:45], 0x80000 ; GFX12-NEXT: s_lshr_b32 s20, s0, 16 ; GFX12-NEXT: s_ashr_i64 s[18:19], s[0:1], 56 ; GFX12-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-NEXT: v_bfe_i32 v15, v6, 0, 8 -; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v38, s27 :: v_dual_mov_b32 v39, s28 -; GFX12-NEXT: v_dual_mov_b32 v40, s29 :: v_dual_mov_b32 v41, s30 -; GFX12-NEXT: v_dual_mov_b32 v42, s31 :: v_dual_mov_b32 v43, s56 -; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s34 -; GFX12-NEXT: v_dual_mov_b32 v52, s23 :: v_dual_mov_b32 v53, s40 -; GFX12-NEXT: v_dual_mov_b32 v54, s41 :: v_dual_mov_b32 v55, s42 +; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v38, s25 :: v_dual_mov_b32 v39, s26 +; GFX12-NEXT: v_dual_mov_b32 v40, s27 :: v_dual_mov_b32 v41, s28 +; GFX12-NEXT: v_dual_mov_b32 v42, s29 :: v_dual_mov_b32 v43, s56 +; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s30 +; GFX12-NEXT: v_dual_mov_b32 v52, s55 :: v_dual_mov_b32 v53, s38 +; GFX12-NEXT: v_dual_mov_b32 v54, s39 :: v_dual_mov_b32 v55, s40 ; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[0:1], s[54:55], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[0:1], s[52:53], 0x80000 ; GFX12-NEXT: v_ashrrev_i32_e32 v12, 31, v11 ; GFX12-NEXT: v_ashrrev_i32_e32 v24, 31, v23 ; GFX12-NEXT: v_ashrrev_i32_e32 v28, 31, v27 ; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:240 -; GFX12-NEXT: v_mov_b32_e32 v33, s44 +; GFX12-NEXT: v_mov_b32_e32 v33, s42 ; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:224 ; GFX12-NEXT: v_dual_mov_b32 v25, s16 :: v_dual_mov_b32 v26, s17 ; GFX12-NEXT: v_dual_mov_b32 v32, s7 :: v_dual_mov_b32 v21, s4 @@ -8882,16 +8882,16 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o ; GFX12-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v9, s12 ; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v5, s0 ; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v46, s35 :: v_dual_mov_b32 v47, s36 -; GFX12-NEXT: v_dual_mov_b32 v48, s37 :: v_dual_mov_b32 v49, s38 -; GFX12-NEXT: v_dual_mov_b32 v34, s45 :: v_dual_mov_b32 v35, s18 +; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v46, s31 :: v_dual_mov_b32 v47, s34 +; GFX12-NEXT: v_dual_mov_b32 v48, s35 :: v_dual_mov_b32 v49, s36 +; GFX12-NEXT: v_dual_mov_b32 v34, s43 :: v_dual_mov_b32 v35, s18 ; GFX12-NEXT: v_dual_mov_b32 v36, s19 :: v_dual_mov_b32 v29, s20 ; GFX12-NEXT: v_ashrrev_i32_e32 v8, 31, v7 ; GFX12-NEXT: v_ashrrev_i32_e32 v20, 31, v19 ; GFX12-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v13, s2 ; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v1, s10 -; GFX12-NEXT: v_dual_mov_b32 v50, s39 :: v_dual_mov_b32 v51, s22 +; GFX12-NEXT: v_dual_mov_b32 v50, s37 :: v_dual_mov_b32 v51, s54 ; GFX12-NEXT: v_dual_mov_b32 v30, s21 :: v_dual_mov_b32 v31, s6 ; GFX12-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX12-NEXT: v_ashrrev_i32_e32 v16, 31, v15 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index f8e7cb397b475e..8a5f75332557e6 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -28,18 +28,17 @@ body: | ; GCN-LABEL: name: test_main ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr0 = COPY $sgpr33 + ; GCN-NEXT: $vcc_hi = frame-setup COPY $sgpr33 ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32 - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr5 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc ; GCN-NEXT: renamable $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr3 @@ -116,18 +115,18 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.3(0x80000000) - ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: - ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 3 ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 2 @@ -198,16 +197,15 @@ body: | ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 ; GCN-NEXT: KILL killed renamable $vgpr2 - ; GCN-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 4 - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) - ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 + ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc - ; GCN-NEXT: $sgpr33 = COPY $sgpr0 + ; GCN-NEXT: $sgpr33 = frame-destroy COPY $vcc_hi ; GCN-NEXT: S_ENDPGM 0 bb.0: liveins: $vgpr0