diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 76bbb30b85a78..4651d7d9d3adf 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6304,11 +6304,7 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } case AMDGPU::SI_INDIRECT_SRC_V1: case AMDGPU::SI_INDIRECT_SRC_V2: - case AMDGPU::SI_INDIRECT_SRC_V3: case AMDGPU::SI_INDIRECT_SRC_V4: - case AMDGPU::SI_INDIRECT_SRC_V5: - case AMDGPU::SI_INDIRECT_SRC_V6: - case AMDGPU::SI_INDIRECT_SRC_V7: case AMDGPU::SI_INDIRECT_SRC_V8: case AMDGPU::SI_INDIRECT_SRC_V9: case AMDGPU::SI_INDIRECT_SRC_V10: @@ -6319,11 +6315,7 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, return emitIndirectSrc(MI, *BB, *getSubtarget()); case AMDGPU::SI_INDIRECT_DST_V1: case AMDGPU::SI_INDIRECT_DST_V2: - case AMDGPU::SI_INDIRECT_DST_V3: case AMDGPU::SI_INDIRECT_DST_V4: - case AMDGPU::SI_INDIRECT_DST_V5: - case AMDGPU::SI_INDIRECT_DST_V6: - case AMDGPU::SI_INDIRECT_DST_V7: case AMDGPU::SI_INDIRECT_DST_V8: case AMDGPU::SI_INDIRECT_DST_V9: case AMDGPU::SI_INDIRECT_DST_V10: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 643b2463344e5..984d1a4db4cd6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -969,11 +969,7 @@ class SI_INDIRECT_DST : VPseudoInstSI < def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC; -def SI_INDIRECT_SRC_V3 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC; -def SI_INDIRECT_SRC_V5 : SI_INDIRECT_SRC; -def SI_INDIRECT_SRC_V6 : SI_INDIRECT_SRC; -def SI_INDIRECT_SRC_V7 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V9 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V10 : SI_INDIRECT_SRC; @@ -984,11 +980,7 @@ def SI_INDIRECT_SRC_V32 : SI_INDIRECT_SRC; def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST; -def SI_INDIRECT_DST_V3 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST; -def SI_INDIRECT_DST_V5 : SI_INDIRECT_DST; -def SI_INDIRECT_DST_V6 : SI_INDIRECT_DST; -def SI_INDIRECT_DST_V7 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V9 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V10 : SI_INDIRECT_DST; @@ -2787,11 +2779,7 @@ multiclass SI_INDIRECT_Pattern { } defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern; defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern; -defm : SI_INDIRECT_Pattern; -defm : SI_INDIRECT_Pattern; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; @@ -2801,11 +2789,7 @@ defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern; defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern; -defm : SI_INDIRECT_Pattern; -defm : SI_INDIRECT_Pattern; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll index 4b340f308d5f6..c69b0cce3d208 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -O0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck --check-prefixes=GCN-O0 %s define amdgpu_kernel void @float4_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-LABEL: float4_extelt: @@ -21,30 +20,6 @@ define amdgpu_kernel void @float4_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float4_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 4.0 -; GCN-O0-NEXT: s_mov_b32 s4, 2.0 -; GCN-O0-NEXT: s_mov_b32 s5, 1.0 -; GCN-O0-NEXT: s_mov_b32 s6, 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s3 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <4 x float> , i32 %sel store float %ext, ptr addrspace(1) %out @@ -68,30 +43,6 @@ define amdgpu_kernel void @int4_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: int4_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 4 -; GCN-O0-NEXT: s_mov_b32 s4, 2 -; GCN-O0-NEXT: s_mov_b32 s5, 1 -; GCN-O0-NEXT: s_mov_b32 s6, 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s3 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <4 x i32> , i32 %sel store i32 %ext, ptr addrspace(1) %out @@ -121,72 +72,6 @@ define amdgpu_kernel void @double4_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s0 ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double4_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 0x40100a3d -; GCN-O0-NEXT: s_mov_b32 s4, 0x70a3d70a -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s3 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s12, s4 -; GCN-O0-NEXT: s_mov_b32 s6, 0x4000147a -; GCN-O0-NEXT: s_mov_b32 s4, 0xe147ae14 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s13, s5 -; GCN-O0-NEXT: s_mov_b32 s14, s4 -; GCN-O0-NEXT: s_mov_b32 s6, 0x3ff028f5 -; GCN-O0-NEXT: s_mov_b32 s4, 0xc28f5c29 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s15, s5 -; GCN-O0-NEXT: s_mov_b32 s16, s4 -; GCN-O0-NEXT: s_mov_b32 s6, 0x3f847ae1 -; GCN-O0-NEXT: s_mov_b32 s4, 0x47ae147b -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s17, s5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s16 -; GCN-O0-NEXT: s_mov_b32 s7, s15 -; GCN-O0-NEXT: s_mov_b32 s8, s14 -; GCN-O0-NEXT: s_mov_b32 s9, s13 -; GCN-O0-NEXT: s_mov_b32 s10, s12 -; GCN-O0-NEXT: s_mov_b32 s11, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s11 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s11 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v1 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <4 x double> , i32 %sel store double %ext, ptr addrspace(1) %out @@ -224,113 +109,6 @@ define amdgpu_kernel void @double5_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s0 ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double5_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 0x40140a3d -; GCN-O0-NEXT: s_mov_b32 s4, 0x70a3d70a -; GCN-O0-NEXT: s_mov_b32 s6, s4 -; GCN-O0-NEXT: s_mov_b32 s7, s3 -; GCN-O0-NEXT: s_mov_b32 s25, s7 -; GCN-O0-NEXT: s_mov_b32 s26, s6 -; GCN-O0-NEXT: s_mov_b32 s3, 0x40100a3d -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s3 -; GCN-O0-NEXT: s_mov_b32 s27, s5 -; GCN-O0-NEXT: s_mov_b32 s28, s4 -; GCN-O0-NEXT: s_mov_b32 s3, 0x4000147a -; GCN-O0-NEXT: s_mov_b32 s4, 0xe147ae14 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s3 -; GCN-O0-NEXT: s_mov_b32 s29, s5 -; GCN-O0-NEXT: s_mov_b32 s30, s4 -; GCN-O0-NEXT: s_mov_b32 s3, 0x3ff028f5 -; GCN-O0-NEXT: s_mov_b32 s4, 0xc28f5c29 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s3 -; GCN-O0-NEXT: s_mov_b32 s31, s5 -; GCN-O0-NEXT: s_mov_b32 s33, s4 -; GCN-O0-NEXT: s_mov_b32 s3, 0x3f847ae1 -; GCN-O0-NEXT: s_mov_b32 s4, 0x47ae147b -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s3 -; GCN-O0-NEXT: s_mov_b32 s34, s5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 -; GCN-O0-NEXT: ; implicit-def: $sgpr24 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr23 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr22 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr21 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr20 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 -; GCN-O0-NEXT: s_mov_b32 s5, s34 -; GCN-O0-NEXT: s_mov_b32 s6, s33 -; GCN-O0-NEXT: s_mov_b32 s7, s31 -; GCN-O0-NEXT: s_mov_b32 s8, s30 -; GCN-O0-NEXT: s_mov_b32 s9, s29 -; GCN-O0-NEXT: s_mov_b32 s10, s28 -; GCN-O0-NEXT: s_mov_b32 s11, s27 -; GCN-O0-NEXT: s_mov_b32 s12, s26 -; GCN-O0-NEXT: s_mov_b32 s13, s25 -; GCN-O0-NEXT: s_mov_b32 s14, s24 -; GCN-O0-NEXT: s_mov_b32 s15, s23 -; GCN-O0-NEXT: s_mov_b32 s16, s22 -; GCN-O0-NEXT: s_mov_b32 s17, s21 -; GCN-O0-NEXT: s_mov_b32 s18, s20 -; GCN-O0-NEXT: s_mov_b32 s19, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s19 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s19 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v1 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <5 x double> , i32 %sel store double %ext, ptr addrspace(1) %out @@ -352,25 +130,6 @@ define amdgpu_kernel void @half4_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_short v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: half4_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s4, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s5, 0x44004200 -; GCN-O0-NEXT: s_mov_b32 s0, 0x40003c00 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s5, 4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s4, s4, s5 -; GCN-O0-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_short v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <4 x half> , i32 %sel store half %ext, ptr addrspace(1) %out @@ -390,24 +149,6 @@ define amdgpu_kernel void @float2_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float2_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 1.0 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s3 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v2 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <2 x float> , i32 %sel store float %ext, ptr addrspace(1) %out @@ -431,48 +172,6 @@ define amdgpu_kernel void @double2_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s0 ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double2_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 0x3ff028f5 -; GCN-O0-NEXT: s_mov_b32 s4, 0xc28f5c29 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s3 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s8, s4 -; GCN-O0-NEXT: s_mov_b32 s6, 0x3f847ae1 -; GCN-O0-NEXT: s_mov_b32 s4, 0x47ae147b -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s9, s5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 -; GCN-O0-NEXT: s_mov_b32 s5, s9 -; GCN-O0-NEXT: s_mov_b32 s6, s8 -; GCN-O0-NEXT: s_mov_b32 s7, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s7 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v1 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <2 x double> , i32 %sel store double %ext, ptr addrspace(1) %out @@ -518,60 +217,6 @@ define amdgpu_kernel void @half8_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_short v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: half8_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_cmp_eq_u32 s2, s3 -; GCN-O0-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-O0-NEXT: s_mov_b32 s3, 0x4000 -; GCN-O0-NEXT: s_mov_b32 s6, 0x3c00 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-O0-NEXT: s_mov_b32 s3, 2 -; GCN-O0-NEXT: s_cmp_eq_u32 s2, s3 -; GCN-O0-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-O0-NEXT: s_mov_b32 s3, 0x4200 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-O0-NEXT: s_mov_b32 s3, 3 -; GCN-O0-NEXT: s_cmp_eq_u32 s2, s3 -; GCN-O0-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-O0-NEXT: s_mov_b32 s3, 0x4400 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-O0-NEXT: s_mov_b32 s3, 4 -; GCN-O0-NEXT: s_cmp_eq_u32 s2, s3 -; GCN-O0-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-O0-NEXT: s_mov_b32 s3, 0x4500 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-O0-NEXT: s_mov_b32 s3, 5 -; GCN-O0-NEXT: s_cmp_eq_u32 s2, s3 -; GCN-O0-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-O0-NEXT: s_mov_b32 s3, 0x4600 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-O0-NEXT: s_mov_b32 s3, 6 -; GCN-O0-NEXT: s_cmp_eq_u32 s2, s3 -; GCN-O0-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-O0-NEXT: s_mov_b32 s3, 0x4700 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-O0-NEXT: s_mov_b32 s3, 7 -; GCN-O0-NEXT: s_cmp_eq_u32 s2, s3 -; GCN-O0-NEXT: s_cselect_b64 s[2:3], -1, 0 -; GCN-O0-NEXT: s_mov_b32 s4, 0x4800 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: v_cndmask_b32_e64 v2, v0, v1, s[2:3] -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_short v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <8 x half> , i32 %sel store half %ext, ptr addrspace(1) %out @@ -603,39 +248,6 @@ define amdgpu_kernel void @short8_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_short v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: short8_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s0, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s1, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_cmp_eq_u32 s0, s1 -; GCN-O0-NEXT: s_mov_b32 s4, 2 -; GCN-O0-NEXT: s_cselect_b32 s1, s4, s1 -; GCN-O0-NEXT: s_cmp_eq_u32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 3 -; GCN-O0-NEXT: s_cselect_b32 s1, s4, s1 -; GCN-O0-NEXT: s_cmp_eq_u32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 4 -; GCN-O0-NEXT: s_cselect_b32 s1, s4, s1 -; GCN-O0-NEXT: s_cmp_eq_u32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 5 -; GCN-O0-NEXT: s_cselect_b32 s1, s4, s1 -; GCN-O0-NEXT: s_cmp_eq_u32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 6 -; GCN-O0-NEXT: s_cselect_b32 s1, s4, s1 -; GCN-O0-NEXT: s_cmp_eq_u32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 7 -; GCN-O0-NEXT: s_cselect_b32 s1, s4, s1 -; GCN-O0-NEXT: s_cmp_eq_u32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s0, 8 -; GCN-O0-NEXT: s_cselect_b32 s0, s0, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_short v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <8 x i16> , i32 %sel store i16 %ext, ptr addrspace(1) %out @@ -662,42 +274,6 @@ define amdgpu_kernel void @float8_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float8_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 0x41000000 -; GCN-O0-NEXT: s_mov_b32 s4, 0x40e00000 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40c00000 -; GCN-O0-NEXT: s_mov_b32 s6, 0x40a00000 -; GCN-O0-NEXT: s_mov_b32 s7, 4.0 -; GCN-O0-NEXT: s_mov_b32 s8, 0x40400000 -; GCN-O0-NEXT: s_mov_b32 s9, 2.0 -; GCN-O0-NEXT: s_mov_b32 s10, 1.0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s3 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v11 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v8 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <8 x float> , i32 %sel store float %ext, ptr addrspace(1) %out @@ -749,101 +325,6 @@ define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s17 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[15:16] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double8_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40200000 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s20, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x401c0000 -; GCN-O0-NEXT: s_mov_b32 s21, s5 -; GCN-O0-NEXT: s_mov_b32 s22, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40180000 -; GCN-O0-NEXT: s_mov_b32 s23, s5 -; GCN-O0-NEXT: s_mov_b32 s24, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40140000 -; GCN-O0-NEXT: s_mov_b32 s25, s5 -; GCN-O0-NEXT: s_mov_b32 s26, s4 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 4.0 -; GCN-O0-NEXT: s_mov_b32 s27, s5 -; GCN-O0-NEXT: s_mov_b32 s28, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40080000 -; GCN-O0-NEXT: s_mov_b32 s29, s5 -; GCN-O0-NEXT: s_mov_b32 s30, s4 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 2.0 -; GCN-O0-NEXT: s_mov_b32 s31, s5 -; GCN-O0-NEXT: s_mov_b32 s33, s4 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s34, s5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 -; GCN-O0-NEXT: s_mov_b32 s5, s34 -; GCN-O0-NEXT: s_mov_b32 s6, s33 -; GCN-O0-NEXT: s_mov_b32 s7, s31 -; GCN-O0-NEXT: s_mov_b32 s8, s30 -; GCN-O0-NEXT: s_mov_b32 s9, s29 -; GCN-O0-NEXT: s_mov_b32 s10, s28 -; GCN-O0-NEXT: s_mov_b32 s11, s27 -; GCN-O0-NEXT: s_mov_b32 s12, s26 -; GCN-O0-NEXT: s_mov_b32 s13, s25 -; GCN-O0-NEXT: s_mov_b32 s14, s24 -; GCN-O0-NEXT: s_mov_b32 s15, s23 -; GCN-O0-NEXT: s_mov_b32 s16, s22 -; GCN-O0-NEXT: s_mov_b32 s17, s21 -; GCN-O0-NEXT: s_mov_b32 s18, s20 -; GCN-O0-NEXT: s_mov_b32 s19, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s19 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s19 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v1 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <8 x double> , i32 %sel store double %ext, ptr addrspace(1) %out @@ -893,101 +374,6 @@ define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s15 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[15:16] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double7_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x401c0000 -; GCN-O0-NEXT: s_mov_b32 s21, s5 -; GCN-O0-NEXT: s_mov_b32 s22, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40180000 -; GCN-O0-NEXT: s_mov_b32 s23, s5 -; GCN-O0-NEXT: s_mov_b32 s24, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40140000 -; GCN-O0-NEXT: s_mov_b32 s25, s5 -; GCN-O0-NEXT: s_mov_b32 s26, s4 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 4.0 -; GCN-O0-NEXT: s_mov_b32 s27, s5 -; GCN-O0-NEXT: s_mov_b32 s28, s4 -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40080000 -; GCN-O0-NEXT: s_mov_b32 s29, s5 -; GCN-O0-NEXT: s_mov_b32 s30, s4 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 2.0 -; GCN-O0-NEXT: s_mov_b32 s31, s5 -; GCN-O0-NEXT: s_mov_b32 s33, s4 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s34, s5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 -; GCN-O0-NEXT: ; implicit-def: $sgpr20 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 -; GCN-O0-NEXT: s_mov_b32 s5, s34 -; GCN-O0-NEXT: s_mov_b32 s6, s33 -; GCN-O0-NEXT: s_mov_b32 s7, s31 -; GCN-O0-NEXT: s_mov_b32 s8, s30 -; GCN-O0-NEXT: s_mov_b32 s9, s29 -; GCN-O0-NEXT: s_mov_b32 s10, s28 -; GCN-O0-NEXT: s_mov_b32 s11, s27 -; GCN-O0-NEXT: s_mov_b32 s12, s26 -; GCN-O0-NEXT: s_mov_b32 s13, s25 -; GCN-O0-NEXT: s_mov_b32 s14, s24 -; GCN-O0-NEXT: s_mov_b32 s15, s23 -; GCN-O0-NEXT: s_mov_b32 s16, s22 -; GCN-O0-NEXT: s_mov_b32 s17, s21 -; GCN-O0-NEXT: s_mov_b32 s18, s20 -; GCN-O0-NEXT: s_mov_b32 s19, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s19 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s19 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v1 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <7 x double> , i32 %sel store double %ext, ptr addrspace(1) %out @@ -1022,66 +408,6 @@ define amdgpu_kernel void @float16_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float16_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 0x41800000 -; GCN-O0-NEXT: s_mov_b32 s4, 0x41700000 -; GCN-O0-NEXT: s_mov_b32 s5, 0x41600000 -; GCN-O0-NEXT: s_mov_b32 s6, 0x41500000 -; GCN-O0-NEXT: s_mov_b32 s7, 0x41400000 -; GCN-O0-NEXT: s_mov_b32 s8, 0x41300000 -; GCN-O0-NEXT: s_mov_b32 s9, 0x41200000 -; GCN-O0-NEXT: s_mov_b32 s10, 0x41100000 -; GCN-O0-NEXT: s_mov_b32 s11, 0x41000000 -; GCN-O0-NEXT: s_mov_b32 s12, 0x40e00000 -; GCN-O0-NEXT: s_mov_b32 s13, 0x40c00000 -; GCN-O0-NEXT: s_mov_b32 s14, 0x40a00000 -; GCN-O0-NEXT: s_mov_b32 s15, 4.0 -; GCN-O0-NEXT: s_mov_b32 s16, 0x40400000 -; GCN-O0-NEXT: s_mov_b32 s17, 2.0 -; GCN-O0-NEXT: s_mov_b32 s18, 1.0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s3 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v27 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v8, v23 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v19 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v16 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <16 x float> , i32 %sel store float %ext, ptr addrspace(1) %out @@ -1163,181 +489,6 @@ define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[31:32] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double15_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s6, 0 -; GCN-O0-NEXT: s_mov_b32 s7, 0x402e0000 -; GCN-O0-NEXT: s_mov_b32 s5, s7 -; GCN-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 -; GCN-O0-NEXT: s_mov_b32 s8, 0 -; GCN-O0-NEXT: s_mov_b32 s9, 0x402c0000 -; GCN-O0-NEXT: s_mov_b32 s7, s9 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s10, 0 -; GCN-O0-NEXT: s_mov_b32 s11, 0x402a0000 -; GCN-O0-NEXT: s_mov_b32 s9, s11 -; GCN-O0-NEXT: ; kill: def $sgpr10 killed $sgpr10 killed $sgpr10_sgpr11 -; GCN-O0-NEXT: s_mov_b32 s12, 0 -; GCN-O0-NEXT: s_mov_b32 s13, 0x40280000 -; GCN-O0-NEXT: s_mov_b32 s11, s13 -; GCN-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 -; GCN-O0-NEXT: s_mov_b32 s14, 0 -; GCN-O0-NEXT: s_mov_b32 s15, 0x40260000 -; GCN-O0-NEXT: s_mov_b32 s13, s15 -; GCN-O0-NEXT: ; kill: def $sgpr14 killed $sgpr14 killed $sgpr14_sgpr15 -; GCN-O0-NEXT: s_mov_b32 s16, 0 -; GCN-O0-NEXT: s_mov_b32 s17, 0x40240000 -; GCN-O0-NEXT: s_mov_b32 s15, s17 -; GCN-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 killed $sgpr16_sgpr17 -; GCN-O0-NEXT: s_mov_b32 s18, 0 -; GCN-O0-NEXT: s_mov_b32 s19, 0x40220000 -; GCN-O0-NEXT: s_mov_b32 s17, s19 -; GCN-O0-NEXT: ; kill: def $sgpr18 killed $sgpr18 killed $sgpr18_sgpr19 -; GCN-O0-NEXT: s_mov_b32 s20, 0 -; GCN-O0-NEXT: s_mov_b32 s21, 0x40200000 -; GCN-O0-NEXT: s_mov_b32 s19, s21 -; GCN-O0-NEXT: ; kill: def $sgpr20 killed $sgpr20 killed $sgpr20_sgpr21 -; GCN-O0-NEXT: s_mov_b32 s22, 0 -; GCN-O0-NEXT: s_mov_b32 s23, 0x401c0000 -; GCN-O0-NEXT: s_mov_b32 s21, s23 -; GCN-O0-NEXT: ; kill: def $sgpr22 killed $sgpr22 killed $sgpr22_sgpr23 -; GCN-O0-NEXT: s_mov_b32 s24, 0 -; GCN-O0-NEXT: s_mov_b32 s25, 0x40180000 -; GCN-O0-NEXT: s_mov_b32 s23, s25 -; GCN-O0-NEXT: ; kill: def $sgpr24 killed $sgpr24 killed $sgpr24_sgpr25 -; GCN-O0-NEXT: s_mov_b32 s26, 0 -; GCN-O0-NEXT: s_mov_b32 s27, 0x40140000 -; GCN-O0-NEXT: s_mov_b32 s25, s27 -; GCN-O0-NEXT: ; kill: def $sgpr26 killed $sgpr26 killed $sgpr26_sgpr27 -; GCN-O0-NEXT: s_mov_b64 s[28:29], 4.0 -; GCN-O0-NEXT: s_mov_b32 s27, s29 -; GCN-O0-NEXT: ; kill: def $sgpr28 killed $sgpr28 killed $sgpr28_sgpr29 -; GCN-O0-NEXT: s_mov_b32 s30, 0 -; GCN-O0-NEXT: s_mov_b32 s31, 0x40080000 -; GCN-O0-NEXT: s_mov_b32 s29, s31 -; GCN-O0-NEXT: ; kill: def $sgpr30 killed $sgpr30 killed $sgpr30_sgpr31 -; GCN-O0-NEXT: s_mov_b64 s[34:35], 2.0 -; GCN-O0-NEXT: s_mov_b32 s31, s35 -; GCN-O0-NEXT: s_mov_b32 s33, s34 -; GCN-O0-NEXT: s_mov_b64 s[36:37], 1.0 -; GCN-O0-NEXT: s_mov_b32 s34, s37 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 killed $sgpr36_sgpr37 -; GCN-O0-NEXT: ; implicit-def: $sgpr4 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr3 -; GCN-O0-NEXT: ; implicit-def: $sgpr35 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 -; GCN-O0-NEXT: s_mov_b32 s37, s34 -; GCN-O0-NEXT: s_mov_b32 s38, s33 -; GCN-O0-NEXT: s_mov_b32 s39, s31 -; GCN-O0-NEXT: s_mov_b32 s40, s30 -; GCN-O0-NEXT: s_mov_b32 s41, s29 -; GCN-O0-NEXT: s_mov_b32 s42, s28 -; GCN-O0-NEXT: s_mov_b32 s43, s27 -; GCN-O0-NEXT: s_mov_b32 s44, s26 -; GCN-O0-NEXT: s_mov_b32 s45, s25 -; GCN-O0-NEXT: s_mov_b32 s46, s24 -; GCN-O0-NEXT: s_mov_b32 s47, s23 -; GCN-O0-NEXT: s_mov_b32 s48, s22 -; GCN-O0-NEXT: s_mov_b32 s49, s21 -; GCN-O0-NEXT: s_mov_b32 s50, s20 -; GCN-O0-NEXT: s_mov_b32 s51, s19 -; GCN-O0-NEXT: s_mov_b32 s52, s18 -; GCN-O0-NEXT: s_mov_b32 s53, s17 -; GCN-O0-NEXT: s_mov_b32 s54, s16 -; GCN-O0-NEXT: s_mov_b32 s55, s15 -; GCN-O0-NEXT: s_mov_b32 s56, s14 -; GCN-O0-NEXT: s_mov_b32 s57, s13 -; GCN-O0-NEXT: s_mov_b32 s58, s12 -; GCN-O0-NEXT: s_mov_b32 s59, s11 -; GCN-O0-NEXT: s_mov_b32 s60, s10 -; GCN-O0-NEXT: s_mov_b32 s61, s9 -; GCN-O0-NEXT: s_mov_b32 s62, s8 -; GCN-O0-NEXT: s_mov_b32 s63, s7 -; GCN-O0-NEXT: s_mov_b32 s64, s6 -; GCN-O0-NEXT: s_mov_b32 s65, s5 -; GCN-O0-NEXT: s_mov_b32 s66, s4 -; GCN-O0-NEXT: s_mov_b32 s67, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s37 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s38 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s39 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s40 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s41 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s42 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s43 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s44 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s45 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s46 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s47 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s48 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s49 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s50 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s51 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s52 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s53 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s54 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s55 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s56 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s57 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s58 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s59 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s60 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s61 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s62 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s63 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s64 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s65 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s66 -; GCN-O0-NEXT: v_mov_b32_e32 v31, s67 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s37 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s38 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s39 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s40 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s41 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s42 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s43 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s44 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s45 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s46 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s47 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s48 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s49 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s50 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s51 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s52 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s53 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s54 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s55 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s56 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s57 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s58 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s59 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s60 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s61 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s62 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s63 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s64 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s65 -; GCN-O0-NEXT: v_mov_b32_e32 v31, s66 -; GCN-O0-NEXT: v_mov_b32_e32 v32, s67 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v1 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <15 x double> , i32 %sel store double %ext, ptr addrspace(1) %out @@ -1421,181 +572,6 @@ define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[31:32] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double16_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s4, 0 -; GCN-O0-NEXT: s_mov_b32 s5, 0x40300000 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 -; GCN-O0-NEXT: s_mov_b32 s6, 0 -; GCN-O0-NEXT: s_mov_b32 s7, 0x402e0000 -; GCN-O0-NEXT: s_mov_b32 s5, s7 -; GCN-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 -; GCN-O0-NEXT: s_mov_b32 s8, 0 -; GCN-O0-NEXT: s_mov_b32 s9, 0x402c0000 -; GCN-O0-NEXT: s_mov_b32 s7, s9 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s10, 0 -; GCN-O0-NEXT: s_mov_b32 s11, 0x402a0000 -; GCN-O0-NEXT: s_mov_b32 s9, s11 -; GCN-O0-NEXT: ; kill: def $sgpr10 killed $sgpr10 killed $sgpr10_sgpr11 -; GCN-O0-NEXT: s_mov_b32 s12, 0 -; GCN-O0-NEXT: s_mov_b32 s13, 0x40280000 -; GCN-O0-NEXT: s_mov_b32 s11, s13 -; GCN-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 -; GCN-O0-NEXT: s_mov_b32 s14, 0 -; GCN-O0-NEXT: s_mov_b32 s15, 0x40260000 -; GCN-O0-NEXT: s_mov_b32 s13, s15 -; GCN-O0-NEXT: ; kill: def $sgpr14 killed $sgpr14 killed $sgpr14_sgpr15 -; GCN-O0-NEXT: s_mov_b32 s16, 0 -; GCN-O0-NEXT: s_mov_b32 s17, 0x40240000 -; GCN-O0-NEXT: s_mov_b32 s15, s17 -; GCN-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 killed $sgpr16_sgpr17 -; GCN-O0-NEXT: s_mov_b32 s18, 0 -; GCN-O0-NEXT: s_mov_b32 s19, 0x40220000 -; GCN-O0-NEXT: s_mov_b32 s17, s19 -; GCN-O0-NEXT: ; kill: def $sgpr18 killed $sgpr18 killed $sgpr18_sgpr19 -; GCN-O0-NEXT: s_mov_b32 s20, 0 -; GCN-O0-NEXT: s_mov_b32 s21, 0x40200000 -; GCN-O0-NEXT: s_mov_b32 s19, s21 -; GCN-O0-NEXT: ; kill: def $sgpr20 killed $sgpr20 killed $sgpr20_sgpr21 -; GCN-O0-NEXT: s_mov_b32 s22, 0 -; GCN-O0-NEXT: s_mov_b32 s23, 0x401c0000 -; GCN-O0-NEXT: s_mov_b32 s21, s23 -; GCN-O0-NEXT: ; kill: def $sgpr22 killed $sgpr22 killed $sgpr22_sgpr23 -; GCN-O0-NEXT: s_mov_b32 s24, 0 -; GCN-O0-NEXT: s_mov_b32 s25, 0x40180000 -; GCN-O0-NEXT: s_mov_b32 s23, s25 -; GCN-O0-NEXT: ; kill: def $sgpr24 killed $sgpr24 killed $sgpr24_sgpr25 -; GCN-O0-NEXT: s_mov_b32 s26, 0 -; GCN-O0-NEXT: s_mov_b32 s27, 0x40140000 -; GCN-O0-NEXT: s_mov_b32 s25, s27 -; GCN-O0-NEXT: ; kill: def $sgpr26 killed $sgpr26 killed $sgpr26_sgpr27 -; GCN-O0-NEXT: s_mov_b64 s[28:29], 4.0 -; GCN-O0-NEXT: s_mov_b32 s27, s29 -; GCN-O0-NEXT: ; kill: def $sgpr28 killed $sgpr28 killed $sgpr28_sgpr29 -; GCN-O0-NEXT: s_mov_b32 s30, 0 -; GCN-O0-NEXT: s_mov_b32 s31, 0x40080000 -; GCN-O0-NEXT: s_mov_b32 s29, s31 -; GCN-O0-NEXT: ; kill: def $sgpr30 killed $sgpr30 killed $sgpr30_sgpr31 -; GCN-O0-NEXT: s_mov_b64 s[34:35], 2.0 -; GCN-O0-NEXT: s_mov_b32 s31, s35 -; GCN-O0-NEXT: s_mov_b32 s33, s34 -; GCN-O0-NEXT: s_mov_b64 s[36:37], 1.0 -; GCN-O0-NEXT: s_mov_b32 s34, s37 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 killed $sgpr36_sgpr37 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 -; GCN-O0-NEXT: s_mov_b32 s37, s34 -; GCN-O0-NEXT: s_mov_b32 s38, s33 -; GCN-O0-NEXT: s_mov_b32 s39, s31 -; GCN-O0-NEXT: s_mov_b32 s40, s30 -; GCN-O0-NEXT: s_mov_b32 s41, s29 -; GCN-O0-NEXT: s_mov_b32 s42, s28 -; GCN-O0-NEXT: s_mov_b32 s43, s27 -; GCN-O0-NEXT: s_mov_b32 s44, s26 -; GCN-O0-NEXT: s_mov_b32 s45, s25 -; GCN-O0-NEXT: s_mov_b32 s46, s24 -; GCN-O0-NEXT: s_mov_b32 s47, s23 -; GCN-O0-NEXT: s_mov_b32 s48, s22 -; GCN-O0-NEXT: s_mov_b32 s49, s21 -; GCN-O0-NEXT: s_mov_b32 s50, s20 -; GCN-O0-NEXT: s_mov_b32 s51, s19 -; GCN-O0-NEXT: s_mov_b32 s52, s18 -; GCN-O0-NEXT: s_mov_b32 s53, s17 -; GCN-O0-NEXT: s_mov_b32 s54, s16 -; GCN-O0-NEXT: s_mov_b32 s55, s15 -; GCN-O0-NEXT: s_mov_b32 s56, s14 -; GCN-O0-NEXT: s_mov_b32 s57, s13 -; GCN-O0-NEXT: s_mov_b32 s58, s12 -; GCN-O0-NEXT: s_mov_b32 s59, s11 -; GCN-O0-NEXT: s_mov_b32 s60, s10 -; GCN-O0-NEXT: s_mov_b32 s61, s9 -; GCN-O0-NEXT: s_mov_b32 s62, s8 -; GCN-O0-NEXT: s_mov_b32 s63, s7 -; GCN-O0-NEXT: s_mov_b32 s64, s6 -; GCN-O0-NEXT: s_mov_b32 s65, s5 -; GCN-O0-NEXT: s_mov_b32 s66, s4 -; GCN-O0-NEXT: s_mov_b32 s67, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s37 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s38 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s39 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s40 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s41 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s42 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s43 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s44 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s45 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s46 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s47 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s48 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s49 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s50 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s51 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s52 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s53 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s54 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s55 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s56 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s57 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s58 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s59 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s60 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s61 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s62 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s63 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s64 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s65 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s66 -; GCN-O0-NEXT: v_mov_b32_e32 v31, s67 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s37 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s38 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s39 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s40 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s41 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s42 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s43 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s44 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s45 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s46 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s47 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s48 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s49 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s50 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s51 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s52 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s53 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s54 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s55 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s56 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s57 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s58 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s59 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s60 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s61 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s62 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s63 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s64 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s65 -; GCN-O0-NEXT: v_mov_b32_e32 v31, s66 -; GCN-O0-NEXT: v_mov_b32_e32 v32, s67 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v1 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <16 x double> , i32 %sel store double %ext, ptr addrspace(1) %out @@ -1646,114 +622,6 @@ define amdgpu_kernel void @float32_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float32_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 0x42000000 -; GCN-O0-NEXT: s_mov_b32 s4, 0x41f80000 -; GCN-O0-NEXT: s_mov_b32 s5, 0x41f00000 -; GCN-O0-NEXT: s_mov_b32 s6, 0x41e80000 -; GCN-O0-NEXT: s_mov_b32 s7, 0x41e00000 -; GCN-O0-NEXT: s_mov_b32 s8, 0x41d80000 -; GCN-O0-NEXT: s_mov_b32 s9, 0x41d00000 -; GCN-O0-NEXT: s_mov_b32 s10, 0x41c80000 -; GCN-O0-NEXT: s_mov_b32 s11, 0x41c00000 -; GCN-O0-NEXT: s_mov_b32 s12, 0x41b80000 -; GCN-O0-NEXT: s_mov_b32 s13, 0x41b00000 -; GCN-O0-NEXT: s_mov_b32 s14, 0x41a80000 -; GCN-O0-NEXT: s_mov_b32 s15, 0x41a00000 -; GCN-O0-NEXT: s_mov_b32 s16, 0x41980000 -; GCN-O0-NEXT: s_mov_b32 s17, 0x41900000 -; GCN-O0-NEXT: s_mov_b32 s18, 0x41880000 -; GCN-O0-NEXT: s_mov_b32 s19, 0x41800000 -; GCN-O0-NEXT: s_mov_b32 s20, 0x41700000 -; GCN-O0-NEXT: s_mov_b32 s21, 0x41600000 -; GCN-O0-NEXT: s_mov_b32 s22, 0x41500000 -; GCN-O0-NEXT: s_mov_b32 s23, 0x41400000 -; GCN-O0-NEXT: s_mov_b32 s24, 0x41300000 -; GCN-O0-NEXT: s_mov_b32 s25, 0x41200000 -; GCN-O0-NEXT: s_mov_b32 s26, 0x41100000 -; GCN-O0-NEXT: s_mov_b32 s27, 0x41000000 -; GCN-O0-NEXT: s_mov_b32 s28, 0x40e00000 -; GCN-O0-NEXT: s_mov_b32 s29, 0x40c00000 -; GCN-O0-NEXT: s_mov_b32 s30, 0x40a00000 -; GCN-O0-NEXT: s_mov_b32 s31, 4.0 -; GCN-O0-NEXT: s_mov_b32 s33, 0x40400000 -; GCN-O0-NEXT: s_mov_b32 s34, 2.0 -; GCN-O0-NEXT: s_mov_b32 s35, 1.0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s35 -; GCN-O0-NEXT: v_mov_b32_e32 v62, s34 -; GCN-O0-NEXT: v_mov_b32_e32 v61, s33 -; GCN-O0-NEXT: v_mov_b32_e32 v60, s31 -; GCN-O0-NEXT: v_mov_b32_e32 v59, s30 -; GCN-O0-NEXT: v_mov_b32_e32 v58, s29 -; GCN-O0-NEXT: v_mov_b32_e32 v57, s28 -; GCN-O0-NEXT: v_mov_b32_e32 v56, s27 -; GCN-O0-NEXT: v_mov_b32_e32 v55, s26 -; GCN-O0-NEXT: v_mov_b32_e32 v54, s25 -; GCN-O0-NEXT: v_mov_b32_e32 v53, s24 -; GCN-O0-NEXT: v_mov_b32_e32 v52, s23 -; GCN-O0-NEXT: v_mov_b32_e32 v51, s22 -; GCN-O0-NEXT: v_mov_b32_e32 v50, s21 -; GCN-O0-NEXT: v_mov_b32_e32 v49, s20 -; GCN-O0-NEXT: v_mov_b32_e32 v48, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v47, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v46, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v45, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v44, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v43, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v42, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v41, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v40, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v39, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v38, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v37, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v36, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v35, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v34, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v33, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v32, s3 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v62 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v61 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v60 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v59 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v58 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v57 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v56 -; GCN-O0-NEXT: v_mov_b32_e32 v8, v55 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v54 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v53 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v52 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v51 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v50 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v49 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v48 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v47 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v42 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v40 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v39 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v38 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v37 -; GCN-O0-NEXT: v_mov_b32_e32 v27, v36 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v35 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v34 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v33 -; GCN-O0-NEXT: v_mov_b32_e32 v31, v32 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movrels_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <32 x float> , i32 %sel store float %ext, ptr addrspace(1) %out @@ -1775,25 +643,6 @@ define amdgpu_kernel void @byte8_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_byte v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: byte8_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s0, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s1, 3 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s4, s0, s1 -; GCN-O0-NEXT: s_mov_b32 s5, 0x8070605 -; GCN-O0-NEXT: s_mov_b32 s0, 0x4030201 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <8 x i8> , i32 %sel store i8 %ext, ptr addrspace(1) %out @@ -1841,61 +690,6 @@ define amdgpu_kernel void @byte16_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_byte v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: byte16_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s14, -1 -; GCN-O0-NEXT: s_mov_b32 s15, 0xe80000 -; GCN-O0-NEXT: s_add_u32 s12, s12, s11 -; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 15 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_and_b32 s3, s2, s3 -; GCN-O0-NEXT: s_mov_b32 s2, 0 -; GCN-O0-NEXT: s_or_b32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 16 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:15 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 15 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:14 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 14 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:13 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 13 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:12 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 12 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:11 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 11 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 10 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 9 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 8 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:7 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 7 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 6 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 5 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 4 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 3 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: buffer_load_ubyte v2, v0, s[12:15], 0 offen -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <16 x i8> , i32 %sel store i8 %ext, ptr addrspace(1) %out @@ -1916,23 +710,6 @@ define amdgpu_kernel void @bit4_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: bit4_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s0, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s1, 3 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s1, s0, s1 -; GCN-O0-NEXT: s_mov_b32 s0, 0x1000100 -; GCN-O0-NEXT: s_lshr_b32 s0, s0, s1 -; GCN-O0-NEXT: s_mov_b32 s1, 1 -; GCN-O0-NEXT: s_and_b32 s0, s0, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <4 x i1> , i32 %sel %zext = zext i1 %ext to i32 @@ -2208,161 +985,6 @@ define amdgpu_kernel void @bit128_extelt(ptr addrspace(1) %out, i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: bit128_extelt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s14, -1 -; GCN-O0-NEXT: s_mov_b32 s15, 0xe80000 -; GCN-O0-NEXT: s_add_u32 s12, s12, s11 -; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: s_mov_b32 s3, 0x7f -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_and_b32 s3, s2, s3 -; GCN-O0-NEXT: s_mov_b32 s2, 0 -; GCN-O0-NEXT: s_add_i32 s2, s2, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:127 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:126 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:125 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:124 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:123 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:122 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:121 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:120 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:119 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:118 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:117 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:116 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:115 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:114 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:113 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:112 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:111 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:110 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:109 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:108 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:107 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:106 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:105 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:104 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:103 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:102 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:101 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:100 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:99 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:98 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:97 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:96 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:95 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:94 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:93 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:92 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:91 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:90 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:89 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:88 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:87 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:86 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:85 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:84 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:83 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:82 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:81 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:80 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:79 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:78 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:77 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:76 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:75 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:74 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:73 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:72 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:71 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:70 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:69 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:68 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:67 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:66 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:65 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:64 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:63 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:62 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:61 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:60 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:59 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:58 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:57 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:56 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:55 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:54 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:53 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:52 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:51 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:50 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:49 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:48 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:47 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:46 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:45 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:44 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:43 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:42 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:41 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:40 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:39 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:38 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:37 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:36 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:35 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:34 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:33 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:32 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:31 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:30 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:29 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:28 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:27 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:26 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:25 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:24 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:23 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:22 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:21 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:20 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:19 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:18 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:17 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:16 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:15 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:14 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:13 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:12 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:11 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:10 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:9 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:8 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:7 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:6 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:5 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:4 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:3 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:2 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:1 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: buffer_load_ubyte v0, v0, s[12:15], 0 offen -; GCN-O0-NEXT: s_mov_b32 s2, 1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v2, v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %ext = extractelement <128 x i1> , i32 %sel %zext = zext i1 %ext to i32 @@ -2466,253 +1088,6 @@ define float @float32_extelt_vec(i32 %sel) { ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 31, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: float32_extelt_vec: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v62, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, 0x42000000 -; GCN-O0-NEXT: s_mov_b32 s5, 0x41f80000 -; GCN-O0-NEXT: s_mov_b32 s6, 0x41f00000 -; GCN-O0-NEXT: s_mov_b32 s7, 0x41e80000 -; GCN-O0-NEXT: s_mov_b32 s8, 0x41e00000 -; GCN-O0-NEXT: s_mov_b32 s9, 0x41d80000 -; GCN-O0-NEXT: s_mov_b32 s10, 0x41d00000 -; GCN-O0-NEXT: s_mov_b32 s11, 0x41c80000 -; GCN-O0-NEXT: s_mov_b32 s12, 0x41c00000 -; GCN-O0-NEXT: s_mov_b32 s13, 0x41b80000 -; GCN-O0-NEXT: s_mov_b32 s14, 0x41b00000 -; GCN-O0-NEXT: s_mov_b32 s15, 0x41a80000 -; GCN-O0-NEXT: s_mov_b32 s16, 0x41a00000 -; GCN-O0-NEXT: s_mov_b32 s17, 0x41980000 -; GCN-O0-NEXT: s_mov_b32 s18, 0x41900000 -; GCN-O0-NEXT: s_mov_b32 s19, 0x41880000 -; GCN-O0-NEXT: s_mov_b32 s20, 0x41800000 -; GCN-O0-NEXT: s_mov_b32 s21, 0x41700000 -; GCN-O0-NEXT: s_mov_b32 s22, 0x41600000 -; GCN-O0-NEXT: s_mov_b32 s23, 0x41500000 -; GCN-O0-NEXT: s_mov_b32 s24, 0x41400000 -; GCN-O0-NEXT: s_mov_b32 s25, 0x41300000 -; GCN-O0-NEXT: s_mov_b32 s26, 0x41200000 -; GCN-O0-NEXT: s_mov_b32 s27, 0x41100000 -; GCN-O0-NEXT: s_mov_b32 s28, 0x41000000 -; GCN-O0-NEXT: s_mov_b32 s29, 0x40e00000 -; GCN-O0-NEXT: s_mov_b32 s40, 0x40c00000 -; GCN-O0-NEXT: s_mov_b32 s41, 0x40a00000 -; GCN-O0-NEXT: s_mov_b32 s42, 4.0 -; GCN-O0-NEXT: s_mov_b32 s43, 0x40400000 -; GCN-O0-NEXT: s_mov_b32 s44, 2.0 -; GCN-O0-NEXT: s_mov_b32 s45, 1.0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s45 -; GCN-O0-NEXT: v_mov_b32_e32 v62, s44 -; GCN-O0-NEXT: v_mov_b32_e32 v61, s43 -; GCN-O0-NEXT: v_mov_b32_e32 v60, s42 -; GCN-O0-NEXT: v_mov_b32_e32 v59, s41 -; GCN-O0-NEXT: v_mov_b32_e32 v58, s40 -; GCN-O0-NEXT: v_mov_b32_e32 v57, s29 -; GCN-O0-NEXT: v_mov_b32_e32 v56, s28 -; GCN-O0-NEXT: v_mov_b32_e32 v47, s27 -; GCN-O0-NEXT: v_mov_b32_e32 v46, s26 -; GCN-O0-NEXT: v_mov_b32_e32 v45, s25 -; GCN-O0-NEXT: v_mov_b32_e32 v44, s24 -; GCN-O0-NEXT: v_mov_b32_e32 v43, s23 -; GCN-O0-NEXT: v_mov_b32_e32 v42, s22 -; GCN-O0-NEXT: v_mov_b32_e32 v41, s21 -; GCN-O0-NEXT: v_mov_b32_e32 v40, s20 -; GCN-O0-NEXT: v_mov_b32_e32 v55, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v54, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v53, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v52, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v51, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v50, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v49, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v48, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v39, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v38, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v37, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v36, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v35, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v34, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v33, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v32, s4 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v62 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v61 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v60 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v59 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v58 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v57 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v56 -; GCN-O0-NEXT: v_mov_b32_e32 v8, v47 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v42 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v40 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v55 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v54 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v53 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v52 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v51 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v50 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v49 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v48 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v39 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v38 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v37 -; GCN-O0-NEXT: v_mov_b32_e32 v27, v36 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v35 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v34 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v33 -; GCN-O0-NEXT: v_mov_b32_e32 v31, v32 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr63 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v63, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v63, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GCN-O0-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[46:47] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB20_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GCN-O0-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[46:47] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v63, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v63, 3 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v32 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v32 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v63, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v63, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GCN-O0-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[46:47] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB20_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GCN-O0-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[46:47] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v63, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v63, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v62, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <32 x float> , i32 %sel ret float %ext @@ -2788,1692 +1163,7 @@ define double @double16_extelt_vec(i32 %sel) { ; GCN-NEXT: v_mov_b32_e32 v1, 0x40301999 ; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: double16_extelt_vec: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v34, s36, 0 -; GCN-O0-NEXT: v_writelane_b32 v34, s37, 1 -; GCN-O0-NEXT: v_writelane_b32 v34, s38, 2 -; GCN-O0-NEXT: v_writelane_b32 v34, s39, 3 -; GCN-O0-NEXT: v_writelane_b32 v34, s48, 4 -; GCN-O0-NEXT: v_writelane_b32 v34, s49, 5 -; GCN-O0-NEXT: v_writelane_b32 v34, s50, 6 -; GCN-O0-NEXT: v_writelane_b32 v34, s51, 7 -; GCN-O0-NEXT: v_writelane_b32 v34, s52, 8 -; GCN-O0-NEXT: v_writelane_b32 v34, s53, 9 -; GCN-O0-NEXT: v_writelane_b32 v34, s54, 10 -; GCN-O0-NEXT: v_writelane_b32 v34, s55, 11 -; GCN-O0-NEXT: v_writelane_b32 v34, s64, 12 -; GCN-O0-NEXT: v_writelane_b32 v34, s65, 13 -; GCN-O0-NEXT: v_writelane_b32 v34, s66, 14 -; GCN-O0-NEXT: v_writelane_b32 v34, s67, 15 -; GCN-O0-NEXT: s_mov_b32 s4, 0x40301999 -; GCN-O0-NEXT: s_mov_b32 s40, 0x9999999a -; GCN-O0-NEXT: s_mov_b32 s6, s40 -; GCN-O0-NEXT: s_mov_b32 s7, s4 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s6, 0x402e3333 -; GCN-O0-NEXT: s_mov_b32 s22, 0x33333333 -; GCN-O0-NEXT: ; implicit-def: $vgpr35 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v35, s22, 0 -; GCN-O0-NEXT: s_mov_b32 s8, s22 -; GCN-O0-NEXT: s_mov_b32 s9, s6 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s8, 0x402c3333 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: s_mov_b32 s11, s8 -; GCN-O0-NEXT: s_mov_b32 s8, s11 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: s_mov_b32 s10, 0x402a3333 -; GCN-O0-NEXT: s_mov_b32 s12, s22 -; GCN-O0-NEXT: s_mov_b32 s13, s10 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s12, 0x40283333 -; GCN-O0-NEXT: s_mov_b32 s14, s22 -; GCN-O0-NEXT: s_mov_b32 s15, s12 -; GCN-O0-NEXT: s_mov_b32 s12, s15 -; GCN-O0-NEXT: s_mov_b32 s13, s14 -; GCN-O0-NEXT: s_mov_b32 s14, 0x40263333 -; GCN-O0-NEXT: s_mov_b32 s16, s22 -; GCN-O0-NEXT: s_mov_b32 s17, s14 -; GCN-O0-NEXT: s_mov_b32 s14, s17 -; GCN-O0-NEXT: s_mov_b32 s15, s16 -; GCN-O0-NEXT: s_mov_b32 s16, 0x40243333 -; GCN-O0-NEXT: s_mov_b32 s18, s22 -; GCN-O0-NEXT: s_mov_b32 s19, s16 -; GCN-O0-NEXT: s_mov_b32 s16, s19 -; GCN-O0-NEXT: s_mov_b32 s17, s18 -; GCN-O0-NEXT: s_mov_b32 s18, 0x40223333 -; GCN-O0-NEXT: s_mov_b32 s20, s22 -; GCN-O0-NEXT: s_mov_b32 s21, s18 -; GCN-O0-NEXT: s_mov_b32 s18, s21 -; GCN-O0-NEXT: s_mov_b32 s19, s20 -; GCN-O0-NEXT: s_mov_b32 s20, 0x40203333 -; GCN-O0-NEXT: ; kill: def $sgpr22 killed $sgpr22 def $sgpr22_sgpr23 -; GCN-O0-NEXT: s_mov_b32 s23, s20 -; GCN-O0-NEXT: s_mov_b32 s20, s23 -; GCN-O0-NEXT: s_mov_b32 s21, s22 -; GCN-O0-NEXT: s_mov_b32 s22, 0x401c6666 -; GCN-O0-NEXT: s_mov_b32 s42, 0x66666666 -; GCN-O0-NEXT: s_mov_b32 s24, s42 -; GCN-O0-NEXT: s_mov_b32 s25, s22 -; GCN-O0-NEXT: s_mov_b32 s22, s25 -; GCN-O0-NEXT: s_mov_b32 s23, s24 -; GCN-O0-NEXT: s_mov_b32 s24, 0x40186666 -; GCN-O0-NEXT: s_mov_b32 s26, s42 -; GCN-O0-NEXT: s_mov_b32 s27, s24 -; GCN-O0-NEXT: s_mov_b32 s24, s27 -; GCN-O0-NEXT: s_mov_b32 s25, s26 -; GCN-O0-NEXT: s_mov_b32 s26, 0x40146666 -; GCN-O0-NEXT: s_mov_b32 s28, s42 -; GCN-O0-NEXT: s_mov_b32 s29, s26 -; GCN-O0-NEXT: s_mov_b32 s26, s29 -; GCN-O0-NEXT: s_mov_b32 s27, s28 -; GCN-O0-NEXT: s_mov_b32 s28, 0x40106666 -; GCN-O0-NEXT: ; kill: def $sgpr42 killed $sgpr42 def $sgpr42_sgpr43 -; GCN-O0-NEXT: s_mov_b32 s43, s28 -; GCN-O0-NEXT: s_mov_b32 s28, s43 -; GCN-O0-NEXT: s_mov_b32 s29, s42 -; GCN-O0-NEXT: s_mov_b32 s41, 0x4008cccc -; GCN-O0-NEXT: s_mov_b32 s42, 0xcccccccd -; GCN-O0-NEXT: s_mov_b32 s44, s42 -; GCN-O0-NEXT: s_mov_b32 s45, s41 -; GCN-O0-NEXT: s_mov_b32 s72, s45 -; GCN-O0-NEXT: s_mov_b32 s73, s44 -; GCN-O0-NEXT: s_mov_b32 s41, 0x4000cccc -; GCN-O0-NEXT: ; kill: def $sgpr42 killed $sgpr42 def $sgpr42_sgpr43 -; GCN-O0-NEXT: s_mov_b32 s43, s41 -; GCN-O0-NEXT: s_mov_b32 s74, s43 -; GCN-O0-NEXT: s_mov_b32 s75, s42 -; GCN-O0-NEXT: s_mov_b32 s42, 0x3ff19999 -; GCN-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41 -; GCN-O0-NEXT: s_mov_b32 s41, s42 -; GCN-O0-NEXT: s_mov_b32 s76, s41 -; GCN-O0-NEXT: s_mov_b32 s36, s40 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 -; GCN-O0-NEXT: s_mov_b32 s37, s76 -; GCN-O0-NEXT: s_mov_b32 s38, s75 -; GCN-O0-NEXT: s_mov_b32 s39, s74 -; GCN-O0-NEXT: s_mov_b32 s40, s73 -; GCN-O0-NEXT: s_mov_b32 s41, s72 -; GCN-O0-NEXT: s_mov_b32 s42, s29 -; GCN-O0-NEXT: s_mov_b32 s43, s28 -; GCN-O0-NEXT: s_mov_b32 s44, s27 -; GCN-O0-NEXT: s_mov_b32 s45, s26 -; GCN-O0-NEXT: s_mov_b32 s46, s25 -; GCN-O0-NEXT: s_mov_b32 s47, s24 -; GCN-O0-NEXT: s_mov_b32 s48, s23 -; GCN-O0-NEXT: s_mov_b32 s49, s22 -; GCN-O0-NEXT: s_mov_b32 s50, s21 -; GCN-O0-NEXT: s_mov_b32 s51, s20 -; GCN-O0-NEXT: s_mov_b32 s52, s19 -; GCN-O0-NEXT: s_mov_b32 s53, s18 -; GCN-O0-NEXT: s_mov_b32 s54, s17 -; GCN-O0-NEXT: s_mov_b32 s55, s16 -; GCN-O0-NEXT: s_mov_b32 s56, s15 -; GCN-O0-NEXT: s_mov_b32 s57, s14 -; GCN-O0-NEXT: s_mov_b32 s58, s13 -; GCN-O0-NEXT: s_mov_b32 s59, s12 -; GCN-O0-NEXT: s_mov_b32 s60, s11 -; GCN-O0-NEXT: s_mov_b32 s61, s10 -; GCN-O0-NEXT: s_mov_b32 s62, s9 -; GCN-O0-NEXT: s_mov_b32 s63, s8 -; GCN-O0-NEXT: s_mov_b32 s64, s7 -; GCN-O0-NEXT: s_mov_b32 s65, s6 -; GCN-O0-NEXT: s_mov_b32 s66, s5 -; GCN-O0-NEXT: s_mov_b32 s67, s4 -; GCN-O0-NEXT: v_writelane_b32 v35, s36, 1 -; GCN-O0-NEXT: v_writelane_b32 v35, s37, 2 -; GCN-O0-NEXT: v_writelane_b32 v35, s38, 3 -; GCN-O0-NEXT: v_writelane_b32 v35, s39, 4 -; GCN-O0-NEXT: v_writelane_b32 v35, s40, 5 -; GCN-O0-NEXT: v_writelane_b32 v35, s41, 6 -; GCN-O0-NEXT: v_writelane_b32 v35, s42, 7 -; GCN-O0-NEXT: v_writelane_b32 v35, s43, 8 -; GCN-O0-NEXT: v_writelane_b32 v35, s44, 9 -; GCN-O0-NEXT: v_writelane_b32 v35, s45, 10 -; GCN-O0-NEXT: v_writelane_b32 v35, s46, 11 -; GCN-O0-NEXT: v_writelane_b32 v35, s47, 12 -; GCN-O0-NEXT: v_writelane_b32 v35, s48, 13 -; GCN-O0-NEXT: v_writelane_b32 v35, s49, 14 -; GCN-O0-NEXT: v_writelane_b32 v35, s50, 15 -; GCN-O0-NEXT: v_writelane_b32 v35, s51, 16 -; GCN-O0-NEXT: v_writelane_b32 v35, s52, 17 -; GCN-O0-NEXT: v_writelane_b32 v35, s53, 18 -; GCN-O0-NEXT: v_writelane_b32 v35, s54, 19 -; GCN-O0-NEXT: v_writelane_b32 v35, s55, 20 -; GCN-O0-NEXT: v_writelane_b32 v35, s56, 21 -; GCN-O0-NEXT: v_writelane_b32 v35, s57, 22 -; GCN-O0-NEXT: v_writelane_b32 v35, s58, 23 -; GCN-O0-NEXT: v_writelane_b32 v35, s59, 24 -; GCN-O0-NEXT: v_writelane_b32 v35, s60, 25 -; GCN-O0-NEXT: v_writelane_b32 v35, s61, 26 -; GCN-O0-NEXT: v_writelane_b32 v35, s62, 27 -; GCN-O0-NEXT: v_writelane_b32 v35, s63, 28 -; GCN-O0-NEXT: v_writelane_b32 v35, s64, 29 -; GCN-O0-NEXT: v_writelane_b32 v35, s65, 30 -; GCN-O0-NEXT: v_writelane_b32 v35, s66, 31 -; GCN-O0-NEXT: v_writelane_b32 v35, s67, 32 -; GCN-O0-NEXT: s_mov_b32 s4, 1 -; GCN-O0-NEXT: v_lshlrev_b32_e64 v0, s4, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_mov_b32_e32 v0, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s37 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s38 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s39 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s40 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s41 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s42 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s43 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s44 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s45 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s46 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s47 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s48 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s49 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s50 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s51 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s52 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s53 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s54 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s55 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s56 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s57 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s58 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s59 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s60 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s61 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s62 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s63 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s64 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s65 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s66 -; GCN-O0-NEXT: v_mov_b32_e32 v31, s67 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: v_writelane_b32 v35, s4, 33 -; GCN-O0-NEXT: v_writelane_b32 v35, s5, 34 -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_store_dword v35, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v35, 35 -; GCN-O0-NEXT: v_readlane_b32 s5, v35, 36 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v32 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v32 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v35, s6, 35 -; GCN-O0-NEXT: v_writelane_b32 v35, s7, 36 -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_store_dword v35, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB21_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v35, 33 -; GCN-O0-NEXT: v_readlane_b32 s5, v35, 34 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s36, v35, 1 -; GCN-O0-NEXT: v_readlane_b32 s37, v35, 2 -; GCN-O0-NEXT: v_readlane_b32 s38, v35, 3 -; GCN-O0-NEXT: v_readlane_b32 s39, v35, 4 -; GCN-O0-NEXT: v_readlane_b32 s40, v35, 5 -; GCN-O0-NEXT: v_readlane_b32 s41, v35, 6 -; GCN-O0-NEXT: v_readlane_b32 s42, v35, 7 -; GCN-O0-NEXT: v_readlane_b32 s43, v35, 8 -; GCN-O0-NEXT: v_readlane_b32 s44, v35, 9 -; GCN-O0-NEXT: v_readlane_b32 s45, v35, 10 -; GCN-O0-NEXT: v_readlane_b32 s46, v35, 11 -; GCN-O0-NEXT: v_readlane_b32 s47, v35, 12 -; GCN-O0-NEXT: v_readlane_b32 s48, v35, 13 -; GCN-O0-NEXT: v_readlane_b32 s49, v35, 14 -; GCN-O0-NEXT: v_readlane_b32 s50, v35, 15 -; GCN-O0-NEXT: v_readlane_b32 s51, v35, 16 -; GCN-O0-NEXT: v_readlane_b32 s52, v35, 17 -; GCN-O0-NEXT: v_readlane_b32 s53, v35, 18 -; GCN-O0-NEXT: v_readlane_b32 s54, v35, 19 -; GCN-O0-NEXT: v_readlane_b32 s55, v35, 20 -; GCN-O0-NEXT: v_readlane_b32 s56, v35, 21 -; GCN-O0-NEXT: v_readlane_b32 s57, v35, 22 -; GCN-O0-NEXT: v_readlane_b32 s58, v35, 23 -; GCN-O0-NEXT: v_readlane_b32 s59, v35, 24 -; GCN-O0-NEXT: v_readlane_b32 s60, v35, 25 -; GCN-O0-NEXT: v_readlane_b32 s61, v35, 26 -; GCN-O0-NEXT: v_readlane_b32 s62, v35, 27 -; GCN-O0-NEXT: v_readlane_b32 s63, v35, 28 -; GCN-O0-NEXT: v_readlane_b32 s64, v35, 29 -; GCN-O0-NEXT: v_readlane_b32 s65, v35, 30 -; GCN-O0-NEXT: v_readlane_b32 s66, v35, 31 -; GCN-O0-NEXT: v_readlane_b32 s67, v35, 32 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s37 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s38 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s39 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s40 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s41 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s42 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s43 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s44 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s45 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s46 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s47 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s48 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s49 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s50 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s51 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s52 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s53 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s54 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s55 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s56 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s57 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s58 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s59 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s60 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s61 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s62 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s63 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s64 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s65 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s66 -; GCN-O0-NEXT: v_mov_b32_e32 v31, s67 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: v_writelane_b32 v35, s4, 37 -; GCN-O0-NEXT: v_writelane_b32 v35, s5, 38 -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_store_dword v35, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB21_4: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v35, 39 -; GCN-O0-NEXT: v_readlane_b32 s5, v35, 40 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v32 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v32 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v35, s6, 39 -; GCN-O0-NEXT: v_writelane_b32 v35, s7, 40 -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_store_dword v35, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB21_4 -; GCN-O0-NEXT: ; %bb.5: -; GCN-O0-NEXT: s_or_saveexec_b64 s[78:79], -1 -; GCN-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[78:79] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v35, 37 -; GCN-O0-NEXT: v_readlane_b32 s5, v35, 38 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.6: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GCN-O0-NEXT: ; implicit-def: $sgpr4 -; GCN-O0-NEXT: ; implicit-def: $sgpr5 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v3 -; GCN-O0-NEXT: s_mov_b32 s4, 32 -; GCN-O0-NEXT: v_lshrrev_b64 v[1:2], s4, v[1:2] -; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec -; GCN-O0-NEXT: v_readlane_b32 s67, v34, 15 -; GCN-O0-NEXT: v_readlane_b32 s66, v34, 14 -; GCN-O0-NEXT: v_readlane_b32 s65, v34, 13 -; GCN-O0-NEXT: v_readlane_b32 s64, v34, 12 -; GCN-O0-NEXT: v_readlane_b32 s55, v34, 11 -; GCN-O0-NEXT: v_readlane_b32 s54, v34, 10 -; GCN-O0-NEXT: v_readlane_b32 s53, v34, 9 -; GCN-O0-NEXT: v_readlane_b32 s52, v34, 8 -; GCN-O0-NEXT: v_readlane_b32 s51, v34, 7 -; GCN-O0-NEXT: v_readlane_b32 s50, v34, 6 -; GCN-O0-NEXT: v_readlane_b32 s49, v34, 5 -; GCN-O0-NEXT: v_readlane_b32 s48, v34, 4 -; GCN-O0-NEXT: v_readlane_b32 s39, v34, 3 -; GCN-O0-NEXT: v_readlane_b32 s38, v34, 2 -; GCN-O0-NEXT: v_readlane_b32 s37, v34, 1 -; GCN-O0-NEXT: v_readlane_b32 s36, v34, 0 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <16 x double> , i32 %sel ret double %ext } - -define i32 @extract_dyn_i32_3(<3 x i32> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_i32_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_i32_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v5, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v5, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB22_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 3 -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v3 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v3 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v5, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v5, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB22_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <3 x i32> %arg, i32 %idx - ret i32 %x -} - -define i32 @extract_dyn_inreg_i32_3(<3 x i32> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_i32_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s19, 1 -; GCN-NEXT: s_cselect_b32 s4, s17, s16 -; GCN-NEXT: s_cmp_eq_u32 s19, 2 -; GCN-NEXT: s_cselect_b32 s4, s18, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_i32_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: s_mov_b32 m0, s19 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <3 x i32> %arg, i32 %idx - ret i32 %x -} - -define float @extract_dyn_float_3(<3 x float> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_float_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_float_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v5, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v5, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB24_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 3 -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v3 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v3 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v5, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v5, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB24_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <3 x float> %arg, i32 %idx - ret float %x -} - -define float @extract_dyn_inreg_float_3(<3 x float> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_float_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s19, 1 -; GCN-NEXT: v_mov_b32_e32 v0, s16 -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s19, 2 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_float_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: s_mov_b32 m0, s19 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <3 x float> %arg, i32 %idx - ret float %x -} - -define i32 @extract_dyn_i32_5(<5 x i32> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_i32_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_i32_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v7, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v7, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB26_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v7, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v7, 3 -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v5 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v5 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v7, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v7, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB26_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v7, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v7, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <5 x i32> %arg, i32 %idx - ret i32 %x -} - -define i32 @extract_dyn_inreg_i32_5(<5 x i32> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_i32_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s21, 1 -; GCN-NEXT: s_cselect_b32 s4, s17, s16 -; GCN-NEXT: s_cmp_eq_u32 s21, 2 -; GCN-NEXT: s_cselect_b32 s4, s18, s4 -; GCN-NEXT: s_cmp_eq_u32 s21, 3 -; GCN-NEXT: s_cselect_b32 s4, s19, s4 -; GCN-NEXT: s_cmp_eq_u32 s21, 4 -; GCN-NEXT: s_cselect_b32 s4, s20, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_i32_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: s_mov_b32 m0, s21 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <5 x i32> %arg, i32 %idx - ret i32 %x -} - -define float @extract_dyn_float_5(<5 x float> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_float_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_float_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v7, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v7, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v7, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v7, 3 -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v5 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v5 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v7, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v7, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB28_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v7, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v7, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <5 x float> %arg, i32 %idx - ret float %x -} - -define float @extract_dyn_inreg_float_5(<5 x float> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_float_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s21, 1 -; GCN-NEXT: v_mov_b32_e32 v0, s16 -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 2 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 3 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 4 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_float_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: s_mov_b32 m0, s21 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <5 x float> %arg, i32 %idx - ret float %x -} - -define i32 @extract_dyn_i32_6(<6 x i32> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_i32_6: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_i32_6: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v8, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v8, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB30_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v8, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v8, 3 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v6 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v6 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v8, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v8, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB30_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v8, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v8, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] -entry: - %x = extractelement <6 x i32> %arg, i32 %idx - ret i32 %x -} - -define i32 @extract_dyn_inreg_i32_6(<6 x i32> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_i32_6: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s22, 1 -; GCN-NEXT: s_cselect_b32 s4, s17, s16 -; GCN-NEXT: s_cmp_eq_u32 s22, 2 -; GCN-NEXT: s_cselect_b32 s4, s18, s4 -; GCN-NEXT: s_cmp_eq_u32 s22, 3 -; GCN-NEXT: s_cselect_b32 s4, s19, s4 -; GCN-NEXT: s_cmp_eq_u32 s22, 4 -; GCN-NEXT: s_cselect_b32 s4, s20, s4 -; GCN-NEXT: s_cmp_eq_u32 s22, 5 -; GCN-NEXT: s_cselect_b32 s4, s21, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_i32_6: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: s_mov_b32 m0, s22 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] -entry: - %x = extractelement <6 x i32> %arg, i32 %idx - ret i32 %x -} - -define float @extract_dyn_float_6(<6 x float> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_float_6: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_float_6: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v8, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v8, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB32_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v8, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v8, 3 -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v6 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v6 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v8, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v8, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB32_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v8, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v8, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] -entry: - %x = extractelement <6 x float> %arg, i32 %idx - ret float %x -} - -define float @extract_dyn_inreg_float_6(<6 x float> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_float_6: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s22, 1 -; GCN-NEXT: v_mov_b32_e32 v0, s16 -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 2 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 3 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 4 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 5 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s21 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_float_6: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: s_mov_b32 m0, s22 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] -entry: - %x = extractelement <6 x float> %arg, i32 %idx - ret float %x -} - -define i32 @extract_dyn_i32_7(<7 x i32> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_i32_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s22 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_i32_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr9 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v9, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v9, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB34_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 3 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v7 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v9, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v9, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB34_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <7 x i32> %arg, i32 %idx - ret i32 %x -} - -define i32 @extract_dyn_inreg_i32_7(<7 x i32> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_i32_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s23, 1 -; GCN-NEXT: s_cselect_b32 s4, s17, s16 -; GCN-NEXT: s_cmp_eq_u32 s23, 2 -; GCN-NEXT: s_cselect_b32 s4, s18, s4 -; GCN-NEXT: s_cmp_eq_u32 s23, 3 -; GCN-NEXT: s_cselect_b32 s4, s19, s4 -; GCN-NEXT: s_cmp_eq_u32 s23, 4 -; GCN-NEXT: s_cselect_b32 s4, s20, s4 -; GCN-NEXT: s_cmp_eq_u32 s23, 5 -; GCN-NEXT: s_cselect_b32 s4, s21, s4 -; GCN-NEXT: s_cmp_eq_u32 s23, 6 -; GCN-NEXT: s_cselect_b32 s4, s22, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_i32_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: s_mov_b32 m0, s23 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <7 x i32> %arg, i32 %idx - ret i32 %x -} - -define float @extract_dyn_float_7(<7 x float> inreg %arg, i32 %idx) { -; GCN-LABEL: extract_dyn_float_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s22 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_float_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr9 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v9, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v9, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: ; implicit-def: $vgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB36_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 3 -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v7 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v9, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v9, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB36_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <7 x float> %arg, i32 %idx - ret float %x -} - -define float @extract_dyn_inreg_float_7(<7 x float> inreg %arg, i32 inreg %idx) { -; GCN-LABEL: extract_dyn_inreg_float_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s23, 1 -; GCN-NEXT: v_mov_b32_e32 v0, s16 -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 2 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 3 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 4 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 5 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s21 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 6 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s22 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: extract_dyn_inreg_float_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: s_mov_b32 m0, s23 -; GCN-O0-NEXT: v_movrels_b32_e32 v0, v0 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = extractelement <7 x float> %arg, i32 %idx - ret float %x -} diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index beeeaa32cacfd..e1b4cad370f96 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s -; RUN: llc -O0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck --check-prefixes=GCN-O0 %s define amdgpu_kernel void @float4_inselt(ptr addrspace(1) %out, <4 x float> %vec, i32 %sel) { ; GCN-LABEL: float4_inselt: @@ -29,25 +28,6 @@ define amdgpu_kernel void @float4_inselt(ptr addrspace(1) %out, <4 x float> %vec ; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float4_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-O0-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x34 -; GCN-O0-NEXT: s_load_dword s2, s[2:3], 0x44 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1.0 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s7 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <4 x float> %vec, float 1.000000e+00, i32 %sel store <4 x float> %v, ptr addrspace(1) %out @@ -67,24 +47,6 @@ define amdgpu_kernel void @float4_inselt_undef(ptr addrspace(1) %out, i32 %sel) ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float4_inselt_undef: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x2c -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1.0 -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s7 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <4 x float> poison, float 1.000000e+00, i32 %sel store <4 x float> %v, ptr addrspace(1) %out @@ -114,25 +76,6 @@ define amdgpu_kernel void @int4_inselt(ptr addrspace(1) %out, <4 x i32> %vec, i3 ; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: int4_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-O0-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x34 -; GCN-O0-NEXT: s_load_dword s2, s[2:3], 0x44 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s7 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <4 x i32> %vec, i32 1, i32 %sel store <4 x i32> %v, ptr addrspace(1) %out @@ -157,23 +100,6 @@ define amdgpu_kernel void @float2_inselt(ptr addrspace(1) %out, <2 x float> %vec ; GCN-NEXT: v_mov_b32_e32 v3, s5 ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float2_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x2c -; GCN-O0-NEXT: s_load_dword s2, s[2:3], 0x34 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1.0 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s5 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <2 x float> %vec, float 1.000000e+00, i32 %sel store <2 x float> %v, ptr addrspace(1) %out @@ -207,57 +133,6 @@ define amdgpu_kernel void @float8_inselt(ptr addrspace(1) %out, <8 x float> %vec ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float8_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-O0-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x44 -; GCN-O0-NEXT: s_load_dword s2, s[2:3], 0x64 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1.0 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v14, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s4 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v7, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v11 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <8 x float> %vec, float 1.000000e+00, i32 %sel store <8 x float> %v, ptr addrspace(1) %out @@ -311,105 +186,6 @@ define amdgpu_kernel void @float16_inselt(ptr addrspace(1) %out, <16 x float> %v ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float16_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-O0-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x64 -; GCN-O0-NEXT: s_load_dword s2, s[2:3], 0xa4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1.0 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v22, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s4 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v7, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v19 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 48 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v16 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v15 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 32 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v11 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <16 x float> %vec, float 1.000000e+00, i32 %sel store <16 x float> %v, ptr addrspace(1) %out @@ -504,267 +280,6 @@ define amdgpu_kernel void @float32_inselt(ptr addrspace(1) %out, <32 x float> %v ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: float32_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xe4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s2, s51 -; GCN-O0-NEXT: s_mov_b32 s3, s50 -; GCN-O0-NEXT: s_mov_b32 s6, s49 -; GCN-O0-NEXT: s_mov_b32 s7, s48 -; GCN-O0-NEXT: s_mov_b32 s8, s47 -; GCN-O0-NEXT: s_mov_b32 s9, s46 -; GCN-O0-NEXT: s_mov_b32 s10, s45 -; GCN-O0-NEXT: s_mov_b32 s11, s44 -; GCN-O0-NEXT: s_mov_b32 s12, s43 -; GCN-O0-NEXT: s_mov_b32 s13, s42 -; GCN-O0-NEXT: s_mov_b32 s14, s41 -; GCN-O0-NEXT: s_mov_b32 s15, s40 -; GCN-O0-NEXT: s_mov_b32 s16, s39 -; GCN-O0-NEXT: s_mov_b32 s17, s38 -; GCN-O0-NEXT: s_mov_b32 s18, s37 -; GCN-O0-NEXT: s_mov_b32 s19, s36 -; GCN-O0-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s20, s51 -; GCN-O0-NEXT: s_mov_b32 s21, s50 -; GCN-O0-NEXT: s_mov_b32 s22, s49 -; GCN-O0-NEXT: s_mov_b32 s23, s48 -; GCN-O0-NEXT: s_mov_b32 s24, s47 -; GCN-O0-NEXT: s_mov_b32 s25, s46 -; GCN-O0-NEXT: s_mov_b32 s26, s45 -; GCN-O0-NEXT: s_mov_b32 s27, s44 -; GCN-O0-NEXT: s_mov_b32 s28, s43 -; GCN-O0-NEXT: s_mov_b32 s29, s42 -; GCN-O0-NEXT: s_mov_b32 s30, s41 -; GCN-O0-NEXT: s_mov_b32 s31, s40 -; GCN-O0-NEXT: s_mov_b32 s33, s39 -; GCN-O0-NEXT: s_mov_b32 s34, s38 -; GCN-O0-NEXT: s_mov_b32 s35, s37 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 killed $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v62, s35 -; GCN-O0-NEXT: v_mov_b32_e32 v61, s34 -; GCN-O0-NEXT: v_mov_b32_e32 v60, s33 -; GCN-O0-NEXT: v_mov_b32_e32 v59, s31 -; GCN-O0-NEXT: v_mov_b32_e32 v58, s30 -; GCN-O0-NEXT: v_mov_b32_e32 v57, s29 -; GCN-O0-NEXT: v_mov_b32_e32 v56, s28 -; GCN-O0-NEXT: v_mov_b32_e32 v55, s27 -; GCN-O0-NEXT: v_mov_b32_e32 v54, s26 -; GCN-O0-NEXT: v_mov_b32_e32 v53, s25 -; GCN-O0-NEXT: v_mov_b32_e32 v52, s24 -; GCN-O0-NEXT: v_mov_b32_e32 v51, s23 -; GCN-O0-NEXT: v_mov_b32_e32 v50, s22 -; GCN-O0-NEXT: v_mov_b32_e32 v49, s21 -; GCN-O0-NEXT: v_mov_b32_e32 v48, s20 -; GCN-O0-NEXT: v_mov_b32_e32 v47, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v46, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v45, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v44, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v43, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v42, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v41, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v40, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v39, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v8, v62 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v61 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v60 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v59 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v58 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v57 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v56 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v55 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v54 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v53 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v52 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v51 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v50 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v49 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v48 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v47 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v27, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v42 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v40 -; GCN-O0-NEXT: v_mov_b32_e32 v31, v39 -; GCN-O0-NEXT: v_mov_b32_e32 v32, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v33, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v34, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v35, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v36, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v37, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v38, v0 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x124 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1.0 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v7, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v38 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v37 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v36 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v35 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x70 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v34 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v33 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v32 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v31 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x60 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v27 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x50 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v23 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 64 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v19 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 48 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v16 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v15 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 32 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v11 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel store <32 x float> %v, ptr addrspace(1) %out @@ -790,30 +305,6 @@ define amdgpu_kernel void @half4_inselt(ptr addrspace(1) %out, <4 x half> %vec, ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: half4_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[0:1], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c -; GCN-O0-NEXT: s_load_dword s6, s[0:1], 0x34 -; GCN-O0-NEXT: s_mov_b32 s7, 0x3c003c00 -; GCN-O0-NEXT: s_mov_b32 s0, s7 -; GCN-O0-NEXT: s_mov_b32 s1, s7 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] -; GCN-O0-NEXT: s_mov_b32 s7, 4 -; GCN-O0-NEXT: s_lshl_b32 s8, s6, s7 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0xffff -; GCN-O0-NEXT: s_lshl_b64 s[6:7], s[6:7], s8 -; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] -; GCN-O0-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <4 x half> %vec, half 1.000000e+00, i32 %sel store <4 x half> %v, ptr addrspace(1) %out @@ -835,26 +326,6 @@ define amdgpu_kernel void @half2_inselt(ptr addrspace(1) %out, <2 x half> %vec, ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: half2_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s1, s[4:5], 0x2c -; GCN-O0-NEXT: s_load_dword s4, s[4:5], 0x30 -; GCN-O0-NEXT: s_mov_b32 s0, 0x3c003c00 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_xor_b32 s0, s1, s0 -; GCN-O0-NEXT: s_mov_b32 s5, 4 -; GCN-O0-NEXT: s_lshl_b32 s5, s4, s5 -; GCN-O0-NEXT: s_mov_b32 s4, 0xffff -; GCN-O0-NEXT: s_lshl_b32 s4, s4, s5 -; GCN-O0-NEXT: s_and_b32 s0, s0, s4 -; GCN-O0-NEXT: s_xor_b32 s0, s0, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <2 x half> %vec, half 1.000000e+00, i32 %sel store <2 x half> %v, ptr addrspace(1) %out @@ -916,56 +387,6 @@ define amdgpu_kernel void @half8_inselt(ptr addrspace(1) %out, <8 x half> %vec, ; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: half8_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s14, -1 -; GCN-O0-NEXT: s_mov_b32 s15, 0xe80000 -; GCN-O0-NEXT: s_add_u32 s12, s12, s11 -; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-O0-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x34 -; GCN-O0-NEXT: s_load_dword s2, s[2:3], 0x44 -; GCN-O0-NEXT: s_mov_b32 s3, 7 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_and_b32 s2, s2, s3 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_lshl_b32 s3, s2, s3 -; GCN-O0-NEXT: s_mov_b32 s2, 0 -; GCN-O0-NEXT: s_or_b32 s2, s2, s3 -; GCN-O0-NEXT: s_mov_b32 s3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 offset:12 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 offset:8 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 offset:4 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 0x3c00 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s2 -; GCN-O0-NEXT: buffer_store_short v0, v1, s[12:15], 0 offen -; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 -; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 offset:4 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <8 x half> %vec, half 1.000000e+00, i32 %sel store <8 x half> %v, ptr addrspace(1) %out @@ -987,26 +408,6 @@ define amdgpu_kernel void @short2_inselt(ptr addrspace(1) %out, <2 x i16> %vec, ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: short2_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s1, s[4:5], 0x2c -; GCN-O0-NEXT: s_load_dword s4, s[4:5], 0x30 -; GCN-O0-NEXT: s_mov_b32 s0, 0x10001 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_xor_b32 s0, s1, s0 -; GCN-O0-NEXT: s_mov_b32 s5, 4 -; GCN-O0-NEXT: s_lshl_b32 s5, s4, s5 -; GCN-O0-NEXT: s_mov_b32 s4, 0xffff -; GCN-O0-NEXT: s_lshl_b32 s4, s4, s5 -; GCN-O0-NEXT: s_and_b32 s0, s0, s4 -; GCN-O0-NEXT: s_xor_b32 s0, s0, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_dword v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <2 x i16> %vec, i16 1, i32 %sel store <2 x i16> %v, ptr addrspace(1) %out @@ -1032,30 +433,6 @@ define amdgpu_kernel void @short4_inselt(ptr addrspace(1) %out, <4 x i16> %vec, ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: short4_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[0:1], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 -; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c -; GCN-O0-NEXT: s_load_dword s6, s[0:1], 0x34 -; GCN-O0-NEXT: s_mov_b32 s7, 0x10001 -; GCN-O0-NEXT: s_mov_b32 s0, s7 -; GCN-O0-NEXT: s_mov_b32 s1, s7 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] -; GCN-O0-NEXT: s_mov_b32 s7, 4 -; GCN-O0-NEXT: s_lshl_b32 s8, s6, s7 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0xffff -; GCN-O0-NEXT: s_lshl_b64 s[6:7], s[6:7], s8 -; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] -; GCN-O0-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <4 x i16> %vec, i16 1, i32 %sel store <4 x i16> %v, ptr addrspace(1) %out @@ -1080,140 +457,6 @@ define amdgpu_kernel void @byte8_inselt(ptr addrspace(1) %out, <8 x i8> %vec, i3 ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: byte8_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b64 s[0:1], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GCN-O0-NEXT: s_load_dword s6, s[0:1], 0x34 -; GCN-O0-NEXT: s_mov_b32 s7, 0x1010101 -; GCN-O0-NEXT: s_mov_b32 s0, s7 -; GCN-O0-NEXT: s_mov_b32 s1, s7 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1] -; GCN-O0-NEXT: s_mov_b32 s7, 3 -; GCN-O0-NEXT: s_lshl_b32 s8, s6, s7 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0xff -; GCN-O0-NEXT: s_lshl_b64 s[6:7], s[6:7], s8 -; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] -; GCN-O0-NEXT: s_xor_b64 s[10:11], s[0:1], s[2:3] -; GCN-O0-NEXT: s_mov_b32 s3, s10 -; GCN-O0-NEXT: s_mov_b32 s0, 8 -; GCN-O0-NEXT: s_lshr_b32 s0, s3, s0 -; GCN-O0-NEXT: s_mov_b32 s1, s10 -; GCN-O0-NEXT: s_mov_b32 s2, 16 -; GCN-O0-NEXT: s_lshr_b32 s2, s3, s2 -; GCN-O0-NEXT: s_mov_b32 s6, 24 -; GCN-O0-NEXT: s_lshr_b32 s3, s3, s6 -; GCN-O0-NEXT: s_mov_b32 s6, 32 -; GCN-O0-NEXT: s_lshr_b64 s[6:7], s[10:11], s6 -; GCN-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 -; GCN-O0-NEXT: s_mov_b32 s7, 40 -; GCN-O0-NEXT: s_lshr_b64 s[8:9], s[10:11], s7 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s8, 48 -; GCN-O0-NEXT: s_lshr_b64 s[8:9], s[10:11], s8 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, 56 -; GCN-O0-NEXT: s_lshr_b64 s[10:11], s[10:11], s9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: s_mov_b64 s[14:15], 7 -; GCN-O0-NEXT: s_mov_b32 s10, s4 -; GCN-O0-NEXT: s_mov_b32 s11, s5 -; GCN-O0-NEXT: s_mov_b32 s13, s14 -; GCN-O0-NEXT: s_mov_b32 s12, s15 -; GCN-O0-NEXT: s_add_u32 s10, s10, s13 -; GCN-O0-NEXT: s_addc_u32 s12, s11, s12 -; GCN-O0-NEXT: ; kill: def $sgpr10 killed $sgpr10 def $sgpr10_sgpr11 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_mov_b64 s[14:15], 6 -; GCN-O0-NEXT: s_mov_b32 s10, s4 -; GCN-O0-NEXT: s_mov_b32 s9, s5 -; GCN-O0-NEXT: s_mov_b32 s12, s14 -; GCN-O0-NEXT: s_mov_b32 s11, s15 -; GCN-O0-NEXT: s_add_u32 s10, s10, s12 -; GCN-O0-NEXT: s_addc_u32 s9, s9, s11 -; GCN-O0-NEXT: ; kill: def $sgpr10 killed $sgpr10 def $sgpr10_sgpr11 -; GCN-O0-NEXT: s_mov_b32 s11, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s8 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 5 -; GCN-O0-NEXT: s_mov_b32 s8, s4 -; GCN-O0-NEXT: s_mov_b32 s9, s5 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s7 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 4 -; GCN-O0-NEXT: s_mov_b32 s8, s4 -; GCN-O0-NEXT: s_mov_b32 s7, s5 -; GCN-O0-NEXT: s_mov_b32 s10, s12 -; GCN-O0-NEXT: s_mov_b32 s9, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s10 -; GCN-O0-NEXT: s_addc_u32 s7, s7, s9 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_mov_b64 s[10:11], 3 -; GCN-O0-NEXT: s_mov_b32 s6, s4 -; GCN-O0-NEXT: s_mov_b32 s7, s5 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: s_mov_b32 s8, s11 -; GCN-O0-NEXT: s_add_u32 s6, s6, s9 -; GCN-O0-NEXT: s_addc_u32 s8, s7, s8 -; GCN-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_mov_b64 s[10:11], 2 -; GCN-O0-NEXT: s_mov_b32 s6, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s8, s10 -; GCN-O0-NEXT: s_mov_b32 s7, s11 -; GCN-O0-NEXT: s_add_u32 s6, s6, s8 -; GCN-O0-NEXT: s_addc_u32 s3, s3, s7 -; GCN-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 -; GCN-O0-NEXT: s_mov_b32 s7, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s2 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s1 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 1 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s4, s6 -; GCN-O0-NEXT: s_mov_b32 s3, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s4 -; GCN-O0-NEXT: s_addc_u32 s1, s1, s3 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <8 x i8> %vec, i8 1, i32 %sel store <8 x i8> %v, ptr addrspace(1) %out @@ -1315,435 +558,6 @@ define amdgpu_kernel void @byte16_inselt(ptr addrspace(1) %out, <16 x i8> %vec, ; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: byte16_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s14, -1 -; GCN-O0-NEXT: s_mov_b32 s15, 0xe80000 -; GCN-O0-NEXT: s_add_u32 s12, s12, s11 -; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 52 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 53 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s0 -; GCN-O0-NEXT: flat_load_ubyte v1, v[1:2] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 54 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s0 -; GCN-O0-NEXT: flat_load_ubyte v2, v[2:3] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 55 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s0 -; GCN-O0-NEXT: flat_load_ubyte v3, v[3:4] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 56 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s0 -; GCN-O0-NEXT: flat_load_ubyte v4, v[4:5] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 57 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s0 -; GCN-O0-NEXT: flat_load_ubyte v5, v[5:6] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 58 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s0 -; GCN-O0-NEXT: flat_load_ubyte v6, v[6:7] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 59 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s0 -; GCN-O0-NEXT: flat_load_ubyte v7, v[7:8] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 60 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s0 -; GCN-O0-NEXT: flat_load_ubyte v8, v[8:9] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 61 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s0 -; GCN-O0-NEXT: flat_load_ubyte v9, v[9:10] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 62 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s0 -; GCN-O0-NEXT: flat_load_ubyte v10, v[10:11] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 63 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s0 -; GCN-O0-NEXT: flat_load_ubyte v11, v[11:12] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 64 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s0 -; GCN-O0-NEXT: flat_load_ubyte v12, v[12:13] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x41 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s0 -; GCN-O0-NEXT: flat_load_ubyte v13, v[13:14] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x42 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s0 -; GCN-O0-NEXT: flat_load_ubyte v14, v[14:15] -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x43 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s0 -; GCN-O0-NEXT: flat_load_ubyte v15, v[15:16] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x44 -; GCN-O0-NEXT: s_mov_b32 s3, 15 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_and_b32 s3, s2, s3 -; GCN-O0-NEXT: s_mov_b32 s2, 0 -; GCN-O0-NEXT: s_or_b32 s2, s2, s3 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v15, off, s[12:15], 0 offset:15 -; GCN-O0-NEXT: buffer_store_byte v14, off, s[12:15], 0 offset:14 -; GCN-O0-NEXT: buffer_store_byte v13, off, s[12:15], 0 offset:13 -; GCN-O0-NEXT: buffer_store_byte v12, off, s[12:15], 0 offset:12 -; GCN-O0-NEXT: buffer_store_byte v11, off, s[12:15], 0 offset:11 -; GCN-O0-NEXT: buffer_store_byte v10, off, s[12:15], 0 offset:10 -; GCN-O0-NEXT: buffer_store_byte v9, off, s[12:15], 0 offset:9 -; GCN-O0-NEXT: buffer_store_byte v8, off, s[12:15], 0 offset:8 -; GCN-O0-NEXT: buffer_store_byte v7, off, s[12:15], 0 offset:7 -; GCN-O0-NEXT: buffer_store_byte v6, off, s[12:15], 0 offset:6 -; GCN-O0-NEXT: buffer_store_byte v5, off, s[12:15], 0 offset:5 -; GCN-O0-NEXT: buffer_store_byte v4, off, s[12:15], 0 offset:4 -; GCN-O0-NEXT: buffer_store_byte v3, off, s[12:15], 0 offset:3 -; GCN-O0-NEXT: buffer_store_byte v2, off, s[12:15], 0 offset:2 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[12:15], 0 offset:1 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s2 -; GCN-O0-NEXT: buffer_store_byte v0, v1, s[12:15], 0 offen -; GCN-O0-NEXT: buffer_load_ubyte v2, off, s[12:15], 0 -; GCN-O0-NEXT: buffer_load_ubyte v3, off, s[12:15], 0 offset:1 -; GCN-O0-NEXT: buffer_load_ubyte v4, off, s[12:15], 0 offset:2 -; GCN-O0-NEXT: buffer_load_ubyte v5, off, s[12:15], 0 offset:3 -; GCN-O0-NEXT: buffer_load_ubyte v6, off, s[12:15], 0 offset:4 -; GCN-O0-NEXT: buffer_load_ubyte v7, off, s[12:15], 0 offset:5 -; GCN-O0-NEXT: buffer_load_ubyte v8, off, s[12:15], 0 offset:6 -; GCN-O0-NEXT: buffer_load_ubyte v9, off, s[12:15], 0 offset:7 -; GCN-O0-NEXT: buffer_load_ubyte v10, off, s[12:15], 0 offset:8 -; GCN-O0-NEXT: buffer_load_ubyte v11, off, s[12:15], 0 offset:9 -; GCN-O0-NEXT: buffer_load_ubyte v12, off, s[12:15], 0 offset:10 -; GCN-O0-NEXT: buffer_load_ubyte v13, off, s[12:15], 0 offset:11 -; GCN-O0-NEXT: buffer_load_ubyte v14, off, s[12:15], 0 offset:12 -; GCN-O0-NEXT: buffer_load_ubyte v15, off, s[12:15], 0 offset:13 -; GCN-O0-NEXT: buffer_load_ubyte v16, off, s[12:15], 0 offset:14 -; GCN-O0-NEXT: buffer_load_ubyte v17, off, s[12:15], 0 offset:15 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 15 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: flat_store_byte v[0:1], v17 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 14 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v16 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 13 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v15 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 12 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v14 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 11 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v13 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 10 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v12 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 9 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v11 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 8 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v10 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 7 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v9 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 6 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v8 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 5 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v7 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 4 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v6 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 3 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v5 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 2 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v4 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 1 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_byte v[0:1], v3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <16 x i8> %vec, i8 1, i32 %sel store <16 x i8> %v, ptr addrspace(1) %out @@ -1771,32 +585,6 @@ define amdgpu_kernel void @double2_inselt(ptr addrspace(1) %out, <2 x double> %v ; GCN-NEXT: v_mov_b32_e32 v5, s5 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double2_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x44 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v3, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <2 x double> %vec, double 1.000000e+00, i32 %sel store <2 x double> %v, ptr addrspace(1) %out @@ -1851,129 +639,6 @@ define amdgpu_kernel void @double5_inselt(ptr addrspace(1) %out, <5 x double> %v ; GCN-NEXT: v_mov_b32_e32 v2, s0 ; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double5_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x84 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s10, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s0 -; GCN-O0-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x64 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s12, s27 -; GCN-O0-NEXT: s_mov_b32 s13, s26 -; GCN-O0-NEXT: s_mov_b32 s14, s25 -; GCN-O0-NEXT: s_mov_b32 s15, s24 -; GCN-O0-NEXT: s_mov_b32 s16, s23 -; GCN-O0-NEXT: s_mov_b32 s17, s22 -; GCN-O0-NEXT: s_mov_b32 s18, s21 -; GCN-O0-NEXT: s_mov_b32 s19, s20 -; GCN-O0-NEXT: ; implicit-def: $sgpr9 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr8 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr7 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr6 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr1 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr20 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v2, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v27 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v8, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v23 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v19 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v0 -; GCN-O0-NEXT: s_load_dword s0, s[4:5], 0xa4 -; GCN-O0-NEXT: s_mov_b32 s1, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s0, s0, s1 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s1 -; GCN-O0-NEXT: s_mov_b32 m0, s0 -; GCN-O0-NEXT: v_movreld_b32_e32 v1, v0 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s1 -; GCN-O0-NEXT: s_mov_b32 m0, s0 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v5 -; GCN-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23_vgpr24_vgpr25 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v23, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v20 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s0 -; GCN-O0-NEXT: flat_store_dwordx4 v[20:21], v[22:25] -; GCN-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20_vgpr21_vgpr22 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v20, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s2 -; GCN-O0-NEXT: flat_store_dwordx4 v[17:18], v[19:22] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v9 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v0 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 32 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: s_mov_b32 s2, s5 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx2 v[0:1], v[2:3] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <5 x double> %vec, double 1.000000e+00, i32 %sel store <5 x double> %v, ptr addrspace(1) %out @@ -2029,112 +694,6 @@ define amdgpu_kernel void @double8_inselt(ptr addrspace(1) %out, <8 x double> %v ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double8_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0xa4 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v13, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v14, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v15, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v19, s20 -; GCN-O0-NEXT: v_mov_b32_e32 v20, s21 -; GCN-O0-NEXT: v_mov_b32_e32 v21, s22 -; GCN-O0-NEXT: v_mov_b32_e32 v22, s23 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v7, v0 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v8, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v19 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 48 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v16 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v15 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 32 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v11 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel store <8 x double> %v, ptr addrspace(1) %out @@ -2188,147 +747,6 @@ define amdgpu_kernel void @double7_inselt(ptr addrspace(1) %out, <7 x double> %v ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dwordx4 v[0:1], v[8:11] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double7_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x94 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s6, s1 -; GCN-O0-NEXT: s_mov_b32 s7, s0 -; GCN-O0-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x84 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s8, s15 -; GCN-O0-NEXT: s_mov_b32 s9, s14 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x64 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s12, s27 -; GCN-O0-NEXT: s_mov_b32 s13, s26 -; GCN-O0-NEXT: s_mov_b32 s14, s25 -; GCN-O0-NEXT: s_mov_b32 s15, s24 -; GCN-O0-NEXT: s_mov_b32 s16, s23 -; GCN-O0-NEXT: s_mov_b32 s17, s22 -; GCN-O0-NEXT: s_mov_b32 s18, s21 -; GCN-O0-NEXT: s_mov_b32 s19, s20 -; GCN-O0-NEXT: ; implicit-def: $sgpr1 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr20 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v30, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v29, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v28, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v27, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v23, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v8, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v27 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v23 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v0 -; GCN-O0-NEXT: s_load_dword s0, s[4:5], 0xa4 -; GCN-O0-NEXT: s_mov_b32 s1, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s0, s0, s1 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s1 -; GCN-O0-NEXT: s_mov_b32 m0, s0 -; GCN-O0-NEXT: v_movreld_b32_e32 v7, v0 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s1 -; GCN-O0-NEXT: s_mov_b32 m0, s0 -; GCN-O0-NEXT: v_movreld_b32_e32 v8, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v16 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v15 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v11 -; GCN-O0-NEXT: ; kill: def $vgpr26 killed $vgpr26 def $vgpr26_vgpr27_vgpr28_vgpr29 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v27, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v24 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v24, s0 -; GCN-O0-NEXT: flat_store_dwordx4 v[24:25], v[26:29] -; GCN-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24_vgpr25_vgpr26 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v24, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s2 -; GCN-O0-NEXT: flat_store_dwordx4 v[3:4], v[23:26] -; GCN-O0-NEXT: v_mov_b32_e32 v3, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v19 -; GCN-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v8, v3 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 48 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s0 -; GCN-O0-NEXT: flat_store_dwordx2 v[3:4], v[7:8] -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 32 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: s_mov_b32 s2, s5 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <7 x double> %vec, double 1.000000e+00, i32 %sel store <7 x double> %v, ptr addrspace(1) %out @@ -2426,275 +844,6 @@ define amdgpu_kernel void @double16_inselt(ptr addrspace(1) %out, <16 x double> ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double16_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xe4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s2, s51 -; GCN-O0-NEXT: s_mov_b32 s3, s50 -; GCN-O0-NEXT: s_mov_b32 s6, s49 -; GCN-O0-NEXT: s_mov_b32 s7, s48 -; GCN-O0-NEXT: s_mov_b32 s8, s47 -; GCN-O0-NEXT: s_mov_b32 s9, s46 -; GCN-O0-NEXT: s_mov_b32 s10, s45 -; GCN-O0-NEXT: s_mov_b32 s11, s44 -; GCN-O0-NEXT: s_mov_b32 s12, s43 -; GCN-O0-NEXT: s_mov_b32 s13, s42 -; GCN-O0-NEXT: s_mov_b32 s14, s41 -; GCN-O0-NEXT: s_mov_b32 s15, s40 -; GCN-O0-NEXT: s_mov_b32 s16, s39 -; GCN-O0-NEXT: s_mov_b32 s17, s38 -; GCN-O0-NEXT: s_mov_b32 s18, s37 -; GCN-O0-NEXT: s_mov_b32 s19, s36 -; GCN-O0-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s20, s51 -; GCN-O0-NEXT: s_mov_b32 s21, s50 -; GCN-O0-NEXT: s_mov_b32 s22, s49 -; GCN-O0-NEXT: s_mov_b32 s23, s48 -; GCN-O0-NEXT: s_mov_b32 s24, s47 -; GCN-O0-NEXT: s_mov_b32 s25, s46 -; GCN-O0-NEXT: s_mov_b32 s26, s45 -; GCN-O0-NEXT: s_mov_b32 s27, s44 -; GCN-O0-NEXT: s_mov_b32 s28, s43 -; GCN-O0-NEXT: s_mov_b32 s29, s42 -; GCN-O0-NEXT: s_mov_b32 s30, s41 -; GCN-O0-NEXT: s_mov_b32 s31, s40 -; GCN-O0-NEXT: s_mov_b32 s33, s39 -; GCN-O0-NEXT: s_mov_b32 s34, s38 -; GCN-O0-NEXT: s_mov_b32 s35, s37 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 killed $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v62, s35 -; GCN-O0-NEXT: v_mov_b32_e32 v61, s34 -; GCN-O0-NEXT: v_mov_b32_e32 v60, s33 -; GCN-O0-NEXT: v_mov_b32_e32 v59, s31 -; GCN-O0-NEXT: v_mov_b32_e32 v58, s30 -; GCN-O0-NEXT: v_mov_b32_e32 v57, s29 -; GCN-O0-NEXT: v_mov_b32_e32 v56, s28 -; GCN-O0-NEXT: v_mov_b32_e32 v55, s27 -; GCN-O0-NEXT: v_mov_b32_e32 v54, s26 -; GCN-O0-NEXT: v_mov_b32_e32 v53, s25 -; GCN-O0-NEXT: v_mov_b32_e32 v52, s24 -; GCN-O0-NEXT: v_mov_b32_e32 v51, s23 -; GCN-O0-NEXT: v_mov_b32_e32 v50, s22 -; GCN-O0-NEXT: v_mov_b32_e32 v49, s21 -; GCN-O0-NEXT: v_mov_b32_e32 v48, s20 -; GCN-O0-NEXT: v_mov_b32_e32 v47, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v46, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v45, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v44, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v43, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v42, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v41, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v40, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v39, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v8, v62 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v61 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v60 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v59 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v58 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v57 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v56 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v55 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v54 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v53 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v52 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v51 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v50 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v49 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v48 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v47 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v27, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v42 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v40 -; GCN-O0-NEXT: v_mov_b32_e32 v31, v39 -; GCN-O0-NEXT: v_mov_b32_e32 v32, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v33, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v34, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v35, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v36, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v37, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v38, v0 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x124 -; GCN-O0-NEXT: s_mov_b32 s3, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s2, s2, s3 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v7, v0 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v8, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v38 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v37 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v36 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v35 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x70 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v34 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v33 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v32 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v31 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x60 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v27 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x50 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v23 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 64 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v19 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 48 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v16 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v15 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 32 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v11 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s2, s0 -; GCN-O0-NEXT: s_mov_b32 s3, s1 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s2, s2, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s3, s4 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: v_mov_b32_e32 v0, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <16 x double> %vec, double 1.000000e+00, i32 %sel store <16 x double> %v, ptr addrspace(1) %out @@ -2790,277 +939,6 @@ define amdgpu_kernel void @double15_inselt(ptr addrspace(1) %out, <15 x double> ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dwordx4 v[0:1], v[24:27] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: double15_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x114 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s6, s1 -; GCN-O0-NEXT: s_mov_b32 s7, s0 -; GCN-O0-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x104 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s8, s15 -; GCN-O0-NEXT: s_mov_b32 s9, s14 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0xe4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s12, s27 -; GCN-O0-NEXT: s_mov_b32 s13, s26 -; GCN-O0-NEXT: s_mov_b32 s14, s25 -; GCN-O0-NEXT: s_mov_b32 s15, s24 -; GCN-O0-NEXT: s_mov_b32 s16, s23 -; GCN-O0-NEXT: s_mov_b32 s17, s22 -; GCN-O0-NEXT: s_mov_b32 s18, s21 -; GCN-O0-NEXT: s_mov_b32 s19, s20 -; GCN-O0-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s20, s51 -; GCN-O0-NEXT: s_mov_b32 s21, s50 -; GCN-O0-NEXT: s_mov_b32 s22, s49 -; GCN-O0-NEXT: s_mov_b32 s23, s48 -; GCN-O0-NEXT: s_mov_b32 s24, s47 -; GCN-O0-NEXT: s_mov_b32 s25, s46 -; GCN-O0-NEXT: s_mov_b32 s26, s45 -; GCN-O0-NEXT: s_mov_b32 s27, s44 -; GCN-O0-NEXT: s_mov_b32 s28, s43 -; GCN-O0-NEXT: s_mov_b32 s29, s42 -; GCN-O0-NEXT: s_mov_b32 s30, s41 -; GCN-O0-NEXT: s_mov_b32 s31, s40 -; GCN-O0-NEXT: s_mov_b32 s33, s39 -; GCN-O0-NEXT: s_mov_b32 s34, s38 -; GCN-O0-NEXT: s_mov_b32 s35, s37 -; GCN-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 killed $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 -; GCN-O0-NEXT: ; implicit-def: $sgpr1 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr0 -; GCN-O0-NEXT: ; implicit-def: $sgpr37 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s36 -; GCN-O0-NEXT: v_mov_b32_e32 v62, s35 -; GCN-O0-NEXT: v_mov_b32_e32 v61, s34 -; GCN-O0-NEXT: v_mov_b32_e32 v60, s33 -; GCN-O0-NEXT: v_mov_b32_e32 v59, s31 -; GCN-O0-NEXT: v_mov_b32_e32 v58, s30 -; GCN-O0-NEXT: v_mov_b32_e32 v57, s29 -; GCN-O0-NEXT: v_mov_b32_e32 v56, s28 -; GCN-O0-NEXT: v_mov_b32_e32 v55, s27 -; GCN-O0-NEXT: v_mov_b32_e32 v54, s26 -; GCN-O0-NEXT: v_mov_b32_e32 v53, s25 -; GCN-O0-NEXT: v_mov_b32_e32 v52, s24 -; GCN-O0-NEXT: v_mov_b32_e32 v51, s23 -; GCN-O0-NEXT: v_mov_b32_e32 v50, s22 -; GCN-O0-NEXT: v_mov_b32_e32 v49, s21 -; GCN-O0-NEXT: v_mov_b32_e32 v48, s20 -; GCN-O0-NEXT: v_mov_b32_e32 v47, s19 -; GCN-O0-NEXT: v_mov_b32_e32 v46, s18 -; GCN-O0-NEXT: v_mov_b32_e32 v45, s17 -; GCN-O0-NEXT: v_mov_b32_e32 v44, s16 -; GCN-O0-NEXT: v_mov_b32_e32 v43, s15 -; GCN-O0-NEXT: v_mov_b32_e32 v42, s14 -; GCN-O0-NEXT: v_mov_b32_e32 v41, s13 -; GCN-O0-NEXT: v_mov_b32_e32 v40, s12 -; GCN-O0-NEXT: v_mov_b32_e32 v39, s11 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v8, v62 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v61 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v60 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v59 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v58 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v57 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v56 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v55 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v54 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v53 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v52 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v51 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v50 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v49 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v48 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v47 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v27, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v42 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v40 -; GCN-O0-NEXT: v_mov_b32_e32 v31, v39 -; GCN-O0-NEXT: v_mov_b32_e32 v32, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v33, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v34, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v35, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v36, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v37, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v38, v0 -; GCN-O0-NEXT: s_load_dword s0, s[4:5], 0x124 -; GCN-O0-NEXT: s_mov_b32 s1, 1 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_lshl_b32 s0, s0, s1 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s1 -; GCN-O0-NEXT: s_mov_b32 m0, s0 -; GCN-O0-NEXT: v_movreld_b32_e32 v7, v0 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s1 -; GCN-O0-NEXT: s_mov_b32 m0, s0 -; GCN-O0-NEXT: v_movreld_b32_e32 v8, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v34 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v33 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v32 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v31 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v39, v23 -; GCN-O0-NEXT: v_mov_b32_e32 v40, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v41, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v46, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v42, v27 -; GCN-O0-NEXT: v_mov_b32_e32 v43, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v44, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v45, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v47, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v48, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v49, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v54, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v50, v11 -; GCN-O0-NEXT: v_mov_b32_e32 v51, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v52, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v53, v16 -; GCN-O0-NEXT: v_mov_b32_e32 v55, v15 -; GCN-O0-NEXT: v_mov_b32_e32 v56, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v57, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v62, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v58, v19 -; GCN-O0-NEXT: ; kill: def $vgpr58 killed $vgpr58 def $vgpr58_vgpr59_vgpr60_vgpr61 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v59, v62 -; GCN-O0-NEXT: v_mov_b32_e32 v60, v57 -; GCN-O0-NEXT: v_mov_b32_e32 v61, v56 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 48 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v57, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v56, s0 -; GCN-O0-NEXT: flat_store_dwordx4 v[56:57], v[58:61] -; GCN-O0-NEXT: ; kill: def $vgpr55 killed $vgpr55 def $vgpr55_vgpr56_vgpr57_vgpr58 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v56, v53 -; GCN-O0-NEXT: v_mov_b32_e32 v57, v52 -; GCN-O0-NEXT: v_mov_b32_e32 v58, v51 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 32 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v52, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v51, s0 -; GCN-O0-NEXT: flat_store_dwordx4 v[51:52], v[55:58] -; GCN-O0-NEXT: ; kill: def $vgpr50 killed $vgpr50 def $vgpr50_vgpr51_vgpr52_vgpr53 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v51, v54 -; GCN-O0-NEXT: v_mov_b32_e32 v52, v49 -; GCN-O0-NEXT: v_mov_b32_e32 v53, v48 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 16 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v49, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v48, s0 -; GCN-O0-NEXT: flat_store_dwordx4 v[48:49], v[50:53] -; GCN-O0-NEXT: ; kill: def $vgpr47 killed $vgpr47 def $vgpr47_vgpr48_vgpr49_vgpr50 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v48, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v49, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v50, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v44, s3 -; GCN-O0-NEXT: v_mov_b32_e32 v43, s2 -; GCN-O0-NEXT: flat_store_dwordx4 v[43:44], v[47:50] -; GCN-O0-NEXT: ; kill: def $vgpr42 killed $vgpr42 def $vgpr42_vgpr43_vgpr44_vgpr45 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v43, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v44, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v45, v40 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x50 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v41, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v40, s0 -; GCN-O0-NEXT: flat_store_dwordx4 v[40:41], v[42:45] -; GCN-O0-NEXT: ; kill: def $vgpr39 killed $vgpr39 def $vgpr39_vgpr40_vgpr41_vgpr42 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v40, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v41, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v42, v3 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 64 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s0 -; GCN-O0-NEXT: flat_store_dwordx4 v[3:4], v[39:42] -; GCN-O0-NEXT: v_mov_b32_e32 v3, v36 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v35 -; GCN-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v8, v3 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 0x70 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s5, s6 -; GCN-O0-NEXT: s_mov_b32 s4, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s5 -; GCN-O0-NEXT: s_addc_u32 s4, s1, s4 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s1 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s0 -; GCN-O0-NEXT: flat_store_dwordx2 v[3:4], v[7:8] -; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v3, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v0 -; GCN-O0-NEXT: s_mov_b64 s[4:5], 0x60 -; GCN-O0-NEXT: s_mov_b32 s0, s2 -; GCN-O0-NEXT: s_mov_b32 s1, s3 -; GCN-O0-NEXT: s_mov_b32 s3, s4 -; GCN-O0-NEXT: s_mov_b32 s2, s5 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_dwordx4 v[0:1], v[2:5] -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <15 x double> %vec, double 1.000000e+00, i32 %sel store <15 x double> %v, ptr addrspace(1) %out @@ -3116,63 +994,6 @@ define amdgpu_kernel void @bit4_inselt(ptr addrspace(1) %out, <4 x i1> %vec, i32 ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_byte v[0:1], v2 ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: bit4_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s14, -1 -; GCN-O0-NEXT: s_mov_b32 s15, 0xe80000 -; GCN-O0-NEXT: s_add_u32 s12, s12, s11 -; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-O0-NEXT: s_load_dword s4, s[2:3], 0x2c -; GCN-O0-NEXT: s_load_dword s2, s[2:3], 0x30 -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_bfe_u32 s3, s4, 0x10001 -; GCN-O0-NEXT: s_bfe_u32 s5, s4, 0x20002 -; GCN-O0-NEXT: s_bfe_u32 s6, s4, 0x10003 -; GCN-O0-NEXT: s_mov_b32 s7, 3 -; GCN-O0-NEXT: s_and_b32 s7, s2, s7 -; GCN-O0-NEXT: s_mov_b32 s2, 0 -; GCN-O0-NEXT: s_or_b32 s2, s2, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:3 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s5 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s3 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[12:15], 0 offset:1 -; GCN-O0-NEXT: v_mov_b32_e32 v3, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: buffer_store_byte v3, v0, s[12:15], 0 offen -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[12:15], 0 -; GCN-O0-NEXT: buffer_load_ubyte v4, off, s[12:15], 0 offset:1 -; GCN-O0-NEXT: buffer_load_ubyte v2, off, s[12:15], 0 offset:2 -; GCN-O0-NEXT: buffer_load_ubyte v1, off, s[12:15], 0 offset:3 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_and_b32_e64 v4, v4, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v4, v3, v4 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v4 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: s_mov_b32 s2, 2 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s2, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_mov_b32 s2, 3 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: s_mov_b32 s2, 15 -; GCN-O0-NEXT: v_and_b32_e64 v2, v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <4 x i1> %vec, i1 1, i32 %sel store <4 x i1> %v, ptr addrspace(1) %out @@ -4015,1599 +1836,6 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: v_mov_b32_e32 v4, s0 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm -; -; GCN-O0-LABEL: bit128_inselt: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b32 s16, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s17, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s18, -1 -; GCN-O0-NEXT: s_mov_b32 s19, 0xe80000 -; GCN-O0-NEXT: s_add_u32 s16, s16, s11 -; GCN-O0-NEXT: s_addc_u32 s17, s17, 0 -; GCN-O0-NEXT: s_mov_b64 s[6:7], 52 -; GCN-O0-NEXT: s_mov_b32 s0, s4 -; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: s_mov_b32 s2, s7 -; GCN-O0-NEXT: s_add_u32 s0, s0, s3 -; GCN-O0-NEXT: s_addc_u32 s2, s1, s2 -; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 -; GCN-O0-NEXT: s_mov_b32 s1, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_mov_b32 s1, 1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:388 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:648 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v2, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v3, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v4, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v5, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v6, v0, 6, 1 -; GCN-O0-NEXT: s_mov_b32 s0, 7 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v7, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 53 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v8, v0, s1 -; GCN-O0-NEXT: v_bfe_u32 v9, v0, 1, 1 -; GCN-O0-NEXT: v_bfe_u32 v10, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v11, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v12, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v13, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v14, v0, 6, 1 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v15, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 54 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v16, v0, s1 -; GCN-O0-NEXT: v_bfe_u32 v17, v0, 1, 1 -; GCN-O0-NEXT: v_bfe_u32 v18, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v19, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v20, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v21, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v22, v0, 6, 1 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v23, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 55 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v24, v0, s1 -; GCN-O0-NEXT: v_bfe_u32 v25, v0, 1, 1 -; GCN-O0-NEXT: v_bfe_u32 v26, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v27, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v28, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v29, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v30, v0, 6, 1 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v31, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 56 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v32, v0, s1 -; GCN-O0-NEXT: v_bfe_u32 v33, v0, 1, 1 -; GCN-O0-NEXT: v_bfe_u32 v34, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v35, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v36, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v37, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v38, v0, 6, 1 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v39, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 57 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v40, v0, s1 -; GCN-O0-NEXT: v_bfe_u32 v41, v0, 1, 1 -; GCN-O0-NEXT: v_bfe_u32 v42, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v43, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v44, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v45, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v46, v0, 6, 1 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v47, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 58 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v48, v0, s1 -; GCN-O0-NEXT: v_bfe_u32 v49, v0, 1, 1 -; GCN-O0-NEXT: v_bfe_u32 v50, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v51, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v52, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v53, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v54, v0, 6, 1 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v55, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 59 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v56, v0, s1 -; GCN-O0-NEXT: v_bfe_u32 v57, v0, 1, 1 -; GCN-O0-NEXT: v_bfe_u32 v58, v0, 2, 1 -; GCN-O0-NEXT: v_bfe_u32 v59, v0, 3, 1 -; GCN-O0-NEXT: v_bfe_u32 v60, v0, 4, 1 -; GCN-O0-NEXT: v_bfe_u32 v61, v0, 5, 1 -; GCN-O0-NEXT: v_bfe_u32 v62, v0, 6, 1 -; GCN-O0-NEXT: v_lshrrev_b32_e64 v63, s0, v0 -; GCN-O0-NEXT: s_mov_b64 s[8:9], 60 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:392 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:396 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:400 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:404 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:408 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:412 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:416 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:420 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[8:9], 61 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:424 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:428 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:432 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:436 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:440 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:444 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:448 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:452 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[8:9], 62 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:456 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:460 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:464 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:468 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:472 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:476 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:480 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:484 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[8:9], 63 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:488 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:492 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:496 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:500 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:504 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:508 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:512 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:516 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[8:9], 64 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:520 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:524 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:528 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:532 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:536 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:540 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:544 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:548 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[8:9], 0x41 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:552 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:556 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:560 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:564 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:568 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:572 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:576 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:580 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[8:9], 0x42 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:584 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:588 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:592 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:596 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:600 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:604 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_bfe_u32 v1, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:608 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:612 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[8:9], 0x43 -; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: s_mov_b32 s3, s5 -; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_mov_b32 s6, s9 -; GCN-O0-NEXT: s_add_u32 s2, s2, s7 -; GCN-O0-NEXT: s_addc_u32 s6, s3, s6 -; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 -; GCN-O0-NEXT: s_mov_b32 s3, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s3 -; GCN-O0-NEXT: flat_load_ubyte v0, v[0:1] -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:648 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, s1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:616 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_bfe_u32 v0, v0, 1, 1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:620 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_bfe_u32 v0, v0, 2, 1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:624 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_bfe_u32 v0, v0, 3, 1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:628 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_bfe_u32 v0, v0, 4, 1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:632 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_bfe_u32 v0, v0, 5, 1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:636 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_bfe_u32 v0, v0, 6, 1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:640 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:644 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshrrev_b32_e64 v0, s0, v0 -; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GCN-O0-NEXT: s_load_dword s2, s[4:5], 0x44 -; GCN-O0-NEXT: s_mov_b32 s3, 0x7f -; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) -; GCN-O0-NEXT: s_and_b32 s3, s2, s3 -; GCN-O0-NEXT: s_mov_b32 s2, 0 -; GCN-O0-NEXT: s_add_i32 s2, s2, s3 -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:127 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:640 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:126 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:636 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:125 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:632 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:124 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:628 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:123 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:624 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:122 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:620 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:121 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:616 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:120 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:612 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:119 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:608 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:118 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:604 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:117 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:600 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:116 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:596 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:115 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:592 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:114 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:588 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:113 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:584 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:112 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:580 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:111 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:576 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:110 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:572 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:109 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:568 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:108 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:564 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:107 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:560 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:106 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:556 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:105 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:552 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:104 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:548 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:103 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:544 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:102 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:540 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:101 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:536 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:100 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:532 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:99 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:528 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:98 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:524 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:97 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:520 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:96 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:516 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:95 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:512 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:94 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:508 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:93 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:504 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:92 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:500 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:91 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:496 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:90 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:492 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:89 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:488 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:88 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:484 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:87 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:480 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:86 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:476 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:85 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:472 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:84 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:468 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:83 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:464 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:82 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:460 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:81 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:456 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:80 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:452 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:79 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:448 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:78 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:444 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:77 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:440 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:76 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:436 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:75 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:432 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:74 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:428 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:73 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:424 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:72 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:420 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:71 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:416 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:70 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:412 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:69 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:408 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:68 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:404 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:67 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:400 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:66 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:396 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:65 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:392 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:64 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:388 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_store_byte v63, off, s[16:19], 0 offset:63 -; GCN-O0-NEXT: buffer_store_byte v62, off, s[16:19], 0 offset:62 -; GCN-O0-NEXT: buffer_store_byte v61, off, s[16:19], 0 offset:61 -; GCN-O0-NEXT: buffer_store_byte v60, off, s[16:19], 0 offset:60 -; GCN-O0-NEXT: buffer_store_byte v59, off, s[16:19], 0 offset:59 -; GCN-O0-NEXT: buffer_store_byte v58, off, s[16:19], 0 offset:58 -; GCN-O0-NEXT: buffer_store_byte v57, off, s[16:19], 0 offset:57 -; GCN-O0-NEXT: buffer_store_byte v56, off, s[16:19], 0 offset:56 -; GCN-O0-NEXT: buffer_store_byte v55, off, s[16:19], 0 offset:55 -; GCN-O0-NEXT: buffer_store_byte v54, off, s[16:19], 0 offset:54 -; GCN-O0-NEXT: buffer_store_byte v53, off, s[16:19], 0 offset:53 -; GCN-O0-NEXT: buffer_store_byte v52, off, s[16:19], 0 offset:52 -; GCN-O0-NEXT: buffer_store_byte v51, off, s[16:19], 0 offset:51 -; GCN-O0-NEXT: buffer_store_byte v50, off, s[16:19], 0 offset:50 -; GCN-O0-NEXT: buffer_store_byte v49, off, s[16:19], 0 offset:49 -; GCN-O0-NEXT: buffer_store_byte v48, off, s[16:19], 0 offset:48 -; GCN-O0-NEXT: buffer_store_byte v47, off, s[16:19], 0 offset:47 -; GCN-O0-NEXT: buffer_store_byte v46, off, s[16:19], 0 offset:46 -; GCN-O0-NEXT: buffer_store_byte v45, off, s[16:19], 0 offset:45 -; GCN-O0-NEXT: buffer_store_byte v44, off, s[16:19], 0 offset:44 -; GCN-O0-NEXT: buffer_store_byte v43, off, s[16:19], 0 offset:43 -; GCN-O0-NEXT: buffer_store_byte v42, off, s[16:19], 0 offset:42 -; GCN-O0-NEXT: buffer_store_byte v41, off, s[16:19], 0 offset:41 -; GCN-O0-NEXT: buffer_store_byte v40, off, s[16:19], 0 offset:40 -; GCN-O0-NEXT: buffer_store_byte v39, off, s[16:19], 0 offset:39 -; GCN-O0-NEXT: buffer_store_byte v38, off, s[16:19], 0 offset:38 -; GCN-O0-NEXT: buffer_store_byte v37, off, s[16:19], 0 offset:37 -; GCN-O0-NEXT: buffer_store_byte v36, off, s[16:19], 0 offset:36 -; GCN-O0-NEXT: buffer_store_byte v35, off, s[16:19], 0 offset:35 -; GCN-O0-NEXT: buffer_store_byte v34, off, s[16:19], 0 offset:34 -; GCN-O0-NEXT: buffer_store_byte v33, off, s[16:19], 0 offset:33 -; GCN-O0-NEXT: buffer_store_byte v32, off, s[16:19], 0 offset:32 -; GCN-O0-NEXT: buffer_store_byte v31, off, s[16:19], 0 offset:31 -; GCN-O0-NEXT: buffer_store_byte v30, off, s[16:19], 0 offset:30 -; GCN-O0-NEXT: buffer_store_byte v29, off, s[16:19], 0 offset:29 -; GCN-O0-NEXT: buffer_store_byte v28, off, s[16:19], 0 offset:28 -; GCN-O0-NEXT: buffer_store_byte v27, off, s[16:19], 0 offset:27 -; GCN-O0-NEXT: buffer_store_byte v26, off, s[16:19], 0 offset:26 -; GCN-O0-NEXT: buffer_store_byte v25, off, s[16:19], 0 offset:25 -; GCN-O0-NEXT: buffer_store_byte v24, off, s[16:19], 0 offset:24 -; GCN-O0-NEXT: buffer_store_byte v23, off, s[16:19], 0 offset:23 -; GCN-O0-NEXT: buffer_store_byte v22, off, s[16:19], 0 offset:22 -; GCN-O0-NEXT: buffer_store_byte v21, off, s[16:19], 0 offset:21 -; GCN-O0-NEXT: buffer_store_byte v20, off, s[16:19], 0 offset:20 -; GCN-O0-NEXT: buffer_store_byte v19, off, s[16:19], 0 offset:19 -; GCN-O0-NEXT: buffer_store_byte v18, off, s[16:19], 0 offset:18 -; GCN-O0-NEXT: buffer_store_byte v17, off, s[16:19], 0 offset:17 -; GCN-O0-NEXT: buffer_store_byte v16, off, s[16:19], 0 offset:16 -; GCN-O0-NEXT: buffer_store_byte v15, off, s[16:19], 0 offset:15 -; GCN-O0-NEXT: buffer_store_byte v14, off, s[16:19], 0 offset:14 -; GCN-O0-NEXT: buffer_store_byte v13, off, s[16:19], 0 offset:13 -; GCN-O0-NEXT: buffer_store_byte v12, off, s[16:19], 0 offset:12 -; GCN-O0-NEXT: buffer_store_byte v11, off, s[16:19], 0 offset:11 -; GCN-O0-NEXT: buffer_store_byte v10, off, s[16:19], 0 offset:10 -; GCN-O0-NEXT: buffer_store_byte v9, off, s[16:19], 0 offset:9 -; GCN-O0-NEXT: buffer_store_byte v8, off, s[16:19], 0 offset:8 -; GCN-O0-NEXT: buffer_store_byte v7, off, s[16:19], 0 offset:7 -; GCN-O0-NEXT: buffer_store_byte v6, off, s[16:19], 0 offset:6 -; GCN-O0-NEXT: buffer_store_byte v5, off, s[16:19], 0 offset:5 -; GCN-O0-NEXT: buffer_store_byte v4, off, s[16:19], 0 offset:4 -; GCN-O0-NEXT: buffer_store_byte v3, off, s[16:19], 0 offset:3 -; GCN-O0-NEXT: buffer_store_byte v2, off, s[16:19], 0 offset:2 -; GCN-O0-NEXT: buffer_store_byte v1, off, s[16:19], 0 offset:1 -; GCN-O0-NEXT: s_waitcnt vmcnt(14) -; GCN-O0-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; GCN-O0-NEXT: v_mov_b32_e32 v3, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s2 -; GCN-O0-NEXT: buffer_store_byte v3, v0, s[16:19], 0 offen -; GCN-O0-NEXT: buffer_load_ubyte v18, off, s[16:19], 0 offset:23 -; GCN-O0-NEXT: buffer_load_ubyte v19, off, s[16:19], 0 offset:22 -; GCN-O0-NEXT: buffer_load_ubyte v20, off, s[16:19], 0 offset:21 -; GCN-O0-NEXT: buffer_load_ubyte v21, off, s[16:19], 0 offset:20 -; GCN-O0-NEXT: buffer_load_ubyte v22, off, s[16:19], 0 offset:19 -; GCN-O0-NEXT: buffer_load_ubyte v23, off, s[16:19], 0 offset:18 -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:128 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v8, off, s[16:19], 0 offset:1 -; GCN-O0-NEXT: buffer_load_ubyte v7, off, s[16:19], 0 offset:2 -; GCN-O0-NEXT: buffer_load_ubyte v6, off, s[16:19], 0 offset:3 -; GCN-O0-NEXT: buffer_load_ubyte v5, off, s[16:19], 0 offset:4 -; GCN-O0-NEXT: buffer_load_ubyte v4, off, s[16:19], 0 offset:5 -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:6 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:140 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:7 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:136 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v9, off, s[16:19], 0 offset:8 -; GCN-O0-NEXT: buffer_load_ubyte v16, off, s[16:19], 0 offset:9 -; GCN-O0-NEXT: buffer_load_ubyte v15, off, s[16:19], 0 offset:10 -; GCN-O0-NEXT: buffer_load_ubyte v14, off, s[16:19], 0 offset:11 -; GCN-O0-NEXT: buffer_load_ubyte v13, off, s[16:19], 0 offset:12 -; GCN-O0-NEXT: buffer_load_ubyte v12, off, s[16:19], 0 offset:13 -; GCN-O0-NEXT: buffer_load_ubyte v11, off, s[16:19], 0 offset:14 -; GCN-O0-NEXT: buffer_load_ubyte v10, off, s[16:19], 0 offset:15 -; GCN-O0-NEXT: buffer_load_ubyte v17, off, s[16:19], 0 offset:16 -; GCN-O0-NEXT: buffer_load_ubyte v24, off, s[16:19], 0 offset:17 -; GCN-O0-NEXT: buffer_load_ubyte v26, off, s[16:19], 0 offset:31 -; GCN-O0-NEXT: buffer_load_ubyte v27, off, s[16:19], 0 offset:30 -; GCN-O0-NEXT: buffer_load_ubyte v28, off, s[16:19], 0 offset:29 -; GCN-O0-NEXT: buffer_load_ubyte v29, off, s[16:19], 0 offset:28 -; GCN-O0-NEXT: buffer_load_ubyte v30, off, s[16:19], 0 offset:27 -; GCN-O0-NEXT: buffer_load_ubyte v31, off, s[16:19], 0 offset:26 -; GCN-O0-NEXT: buffer_load_ubyte v32, off, s[16:19], 0 offset:25 -; GCN-O0-NEXT: buffer_load_ubyte v25, off, s[16:19], 0 offset:24 -; GCN-O0-NEXT: buffer_load_ubyte v34, off, s[16:19], 0 offset:39 -; GCN-O0-NEXT: buffer_load_ubyte v35, off, s[16:19], 0 offset:38 -; GCN-O0-NEXT: buffer_load_ubyte v36, off, s[16:19], 0 offset:37 -; GCN-O0-NEXT: buffer_load_ubyte v37, off, s[16:19], 0 offset:36 -; GCN-O0-NEXT: buffer_load_ubyte v38, off, s[16:19], 0 offset:35 -; GCN-O0-NEXT: buffer_load_ubyte v39, off, s[16:19], 0 offset:34 -; GCN-O0-NEXT: buffer_load_ubyte v40, off, s[16:19], 0 offset:33 -; GCN-O0-NEXT: buffer_load_ubyte v33, off, s[16:19], 0 offset:32 -; GCN-O0-NEXT: buffer_load_ubyte v42, off, s[16:19], 0 offset:47 -; GCN-O0-NEXT: buffer_load_ubyte v43, off, s[16:19], 0 offset:46 -; GCN-O0-NEXT: buffer_load_ubyte v44, off, s[16:19], 0 offset:45 -; GCN-O0-NEXT: buffer_load_ubyte v45, off, s[16:19], 0 offset:44 -; GCN-O0-NEXT: buffer_load_ubyte v46, off, s[16:19], 0 offset:43 -; GCN-O0-NEXT: buffer_load_ubyte v47, off, s[16:19], 0 offset:42 -; GCN-O0-NEXT: buffer_load_ubyte v48, off, s[16:19], 0 offset:41 -; GCN-O0-NEXT: buffer_load_ubyte v41, off, s[16:19], 0 offset:40 -; GCN-O0-NEXT: buffer_load_ubyte v50, off, s[16:19], 0 offset:55 -; GCN-O0-NEXT: buffer_load_ubyte v51, off, s[16:19], 0 offset:54 -; GCN-O0-NEXT: buffer_load_ubyte v52, off, s[16:19], 0 offset:53 -; GCN-O0-NEXT: buffer_load_ubyte v53, off, s[16:19], 0 offset:52 -; GCN-O0-NEXT: buffer_load_ubyte v54, off, s[16:19], 0 offset:51 -; GCN-O0-NEXT: buffer_load_ubyte v55, off, s[16:19], 0 offset:50 -; GCN-O0-NEXT: buffer_load_ubyte v56, off, s[16:19], 0 offset:49 -; GCN-O0-NEXT: buffer_load_ubyte v49, off, s[16:19], 0 offset:48 -; GCN-O0-NEXT: buffer_load_ubyte v58, off, s[16:19], 0 offset:63 -; GCN-O0-NEXT: buffer_load_ubyte v59, off, s[16:19], 0 offset:62 -; GCN-O0-NEXT: buffer_load_ubyte v60, off, s[16:19], 0 offset:61 -; GCN-O0-NEXT: buffer_load_ubyte v61, off, s[16:19], 0 offset:60 -; GCN-O0-NEXT: buffer_load_ubyte v62, off, s[16:19], 0 offset:59 -; GCN-O0-NEXT: buffer_load_ubyte v63, off, s[16:19], 0 offset:58 -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:57 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:132 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v57, off, s[16:19], 0 offset:56 -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:71 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:144 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:70 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:172 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:69 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:148 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:68 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:152 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:67 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:156 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:66 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:160 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:65 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:168 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:64 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:164 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:79 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:176 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:78 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:77 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:180 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:76 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:184 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:75 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:188 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:74 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:192 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:73 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:72 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:196 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:87 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:86 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:236 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:85 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:212 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:84 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:216 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:83 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:220 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:82 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:224 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:81 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:232 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:80 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:228 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:95 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:240 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:94 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:268 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:93 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:244 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:92 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:248 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:91 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:252 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:90 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:256 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:89 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:264 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:88 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:260 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:103 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:272 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:102 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:300 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:101 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:276 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:100 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:280 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:99 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:284 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:98 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:288 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:97 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:296 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:96 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:292 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:111 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:304 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:110 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:332 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:109 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:308 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:108 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:312 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:107 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:316 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:106 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:320 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:105 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:328 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:104 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:324 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:119 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:336 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:118 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:364 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:117 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:340 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:116 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:344 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:115 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:348 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:114 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:352 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:113 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:360 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:112 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:356 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:127 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:368 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v2, off, s[16:19], 0 offset:126 -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:125 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:372 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:124 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:376 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:123 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:380 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:122 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:384 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_load_ubyte v1, off, s[16:19], 0 offset:121 -; GCN-O0-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 offset:120 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:384 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: s_mov_b32 s7, 2 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:380 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: s_mov_b32 s6, 3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:376 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: s_mov_b32 s5, 4 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:372 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: s_mov_b32 s4, 5 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:368 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: s_mov_b32 s3, 6 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_mov_b32 s2, 7 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 15 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:364 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:360 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:356 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:352 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:348 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:344 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:340 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:336 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 14 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:332 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:328 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:324 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:320 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:316 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:312 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:308 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:304 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 13 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:300 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:296 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:292 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:288 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:284 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:280 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:276 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:272 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 12 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:268 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:264 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:260 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:256 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:252 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:248 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:244 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:240 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 11 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:236 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:232 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:228 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:224 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:220 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:216 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:212 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:208 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 10 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:204 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:200 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:196 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:192 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:188 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:184 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:180 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:176 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 9 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:172 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:168 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:164 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, v3, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:160 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s7, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:156 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s6, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:152 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s5, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:148 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v1, v1, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s4, v1 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 8 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:140 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:136 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:132 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v57, v57, v3 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v0, v3, v0 -; GCN-O0-NEXT: v_or_b32_e64 v57, v57, v0 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:128 ; 4-byte Folded Reload -; GCN-O0-NEXT: v_and_b32_e64 v63, v63, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v63, s7, v63 -; GCN-O0-NEXT: v_or_b32_e64 v57, v57, v63 -; GCN-O0-NEXT: v_and_b32_e64 v62, v62, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v62, s6, v62 -; GCN-O0-NEXT: v_or_b32_e64 v57, v57, v62 -; GCN-O0-NEXT: v_and_b32_e64 v61, v61, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v61, s5, v61 -; GCN-O0-NEXT: v_or_b32_e64 v57, v57, v61 -; GCN-O0-NEXT: v_and_b32_e64 v60, v60, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v60, s4, v60 -; GCN-O0-NEXT: v_or_b32_e64 v57, v57, v60 -; GCN-O0-NEXT: v_and_b32_e64 v59, v59, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v59, s3, v59 -; GCN-O0-NEXT: v_or_b32_e64 v57, v57, v59 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v58, s2, v58 -; GCN-O0-NEXT: v_or_b32_e64 v59, v57, v58 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 7 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v58, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v57, s8 -; GCN-O0-NEXT: flat_store_byte v[57:58], v59 -; GCN-O0-NEXT: v_and_b32_e64 v49, v49, v3 -; GCN-O0-NEXT: v_and_b32_e64 v56, v56, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v56, v3, v56 -; GCN-O0-NEXT: v_or_b32_e64 v49, v49, v56 -; GCN-O0-NEXT: v_and_b32_e64 v55, v55, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v55, s7, v55 -; GCN-O0-NEXT: v_or_b32_e64 v49, v49, v55 -; GCN-O0-NEXT: v_and_b32_e64 v54, v54, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v54, s6, v54 -; GCN-O0-NEXT: v_or_b32_e64 v49, v49, v54 -; GCN-O0-NEXT: v_and_b32_e64 v53, v53, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v53, s5, v53 -; GCN-O0-NEXT: v_or_b32_e64 v49, v49, v53 -; GCN-O0-NEXT: v_and_b32_e64 v52, v52, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v52, s4, v52 -; GCN-O0-NEXT: v_or_b32_e64 v49, v49, v52 -; GCN-O0-NEXT: v_and_b32_e64 v51, v51, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v51, s3, v51 -; GCN-O0-NEXT: v_or_b32_e64 v49, v49, v51 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v50, s2, v50 -; GCN-O0-NEXT: v_or_b32_e64 v51, v49, v50 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 6 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v50, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v49, s8 -; GCN-O0-NEXT: flat_store_byte v[49:50], v51 -; GCN-O0-NEXT: v_and_b32_e64 v41, v41, v3 -; GCN-O0-NEXT: v_and_b32_e64 v48, v48, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v48, v3, v48 -; GCN-O0-NEXT: v_or_b32_e64 v41, v41, v48 -; GCN-O0-NEXT: v_and_b32_e64 v47, v47, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v47, s7, v47 -; GCN-O0-NEXT: v_or_b32_e64 v41, v41, v47 -; GCN-O0-NEXT: v_and_b32_e64 v46, v46, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v46, s6, v46 -; GCN-O0-NEXT: v_or_b32_e64 v41, v41, v46 -; GCN-O0-NEXT: v_and_b32_e64 v45, v45, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v45, s5, v45 -; GCN-O0-NEXT: v_or_b32_e64 v41, v41, v45 -; GCN-O0-NEXT: v_and_b32_e64 v44, v44, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v44, s4, v44 -; GCN-O0-NEXT: v_or_b32_e64 v41, v41, v44 -; GCN-O0-NEXT: v_and_b32_e64 v43, v43, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v43, s3, v43 -; GCN-O0-NEXT: v_or_b32_e64 v41, v41, v43 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v42, s2, v42 -; GCN-O0-NEXT: v_or_b32_e64 v43, v41, v42 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 5 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v42, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v41, s8 -; GCN-O0-NEXT: flat_store_byte v[41:42], v43 -; GCN-O0-NEXT: v_and_b32_e64 v33, v33, v3 -; GCN-O0-NEXT: v_and_b32_e64 v40, v40, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v40, v3, v40 -; GCN-O0-NEXT: v_or_b32_e64 v33, v33, v40 -; GCN-O0-NEXT: v_and_b32_e64 v39, v39, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v39, s7, v39 -; GCN-O0-NEXT: v_or_b32_e64 v33, v33, v39 -; GCN-O0-NEXT: v_and_b32_e64 v38, v38, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v38, s6, v38 -; GCN-O0-NEXT: v_or_b32_e64 v33, v33, v38 -; GCN-O0-NEXT: v_and_b32_e64 v37, v37, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v37, s5, v37 -; GCN-O0-NEXT: v_or_b32_e64 v33, v33, v37 -; GCN-O0-NEXT: v_and_b32_e64 v36, v36, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v36, s4, v36 -; GCN-O0-NEXT: v_or_b32_e64 v33, v33, v36 -; GCN-O0-NEXT: v_and_b32_e64 v35, v35, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v35, s3, v35 -; GCN-O0-NEXT: v_or_b32_e64 v33, v33, v35 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v34, s2, v34 -; GCN-O0-NEXT: v_or_b32_e64 v35, v33, v34 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 4 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v34, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v33, s8 -; GCN-O0-NEXT: flat_store_byte v[33:34], v35 -; GCN-O0-NEXT: v_and_b32_e64 v25, v25, v3 -; GCN-O0-NEXT: v_and_b32_e64 v32, v32, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v32, v3, v32 -; GCN-O0-NEXT: v_or_b32_e64 v25, v25, v32 -; GCN-O0-NEXT: v_and_b32_e64 v31, v31, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v31, s7, v31 -; GCN-O0-NEXT: v_or_b32_e64 v25, v25, v31 -; GCN-O0-NEXT: v_and_b32_e64 v30, v30, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v30, s6, v30 -; GCN-O0-NEXT: v_or_b32_e64 v25, v25, v30 -; GCN-O0-NEXT: v_and_b32_e64 v29, v29, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v29, s5, v29 -; GCN-O0-NEXT: v_or_b32_e64 v25, v25, v29 -; GCN-O0-NEXT: v_and_b32_e64 v28, v28, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v28, s4, v28 -; GCN-O0-NEXT: v_or_b32_e64 v25, v25, v28 -; GCN-O0-NEXT: v_and_b32_e64 v27, v27, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v27, s3, v27 -; GCN-O0-NEXT: v_or_b32_e64 v25, v25, v27 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v26, s2, v26 -; GCN-O0-NEXT: v_or_b32_e64 v27, v25, v26 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 3 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v26, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v25, s8 -; GCN-O0-NEXT: flat_store_byte v[25:26], v27 -; GCN-O0-NEXT: v_and_b32_e64 v17, v17, v3 -; GCN-O0-NEXT: v_and_b32_e64 v24, v24, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v24, v3, v24 -; GCN-O0-NEXT: v_or_b32_e64 v17, v17, v24 -; GCN-O0-NEXT: v_and_b32_e64 v23, v23, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v23, s7, v23 -; GCN-O0-NEXT: v_or_b32_e64 v17, v17, v23 -; GCN-O0-NEXT: v_and_b32_e64 v22, v22, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v22, s6, v22 -; GCN-O0-NEXT: v_or_b32_e64 v17, v17, v22 -; GCN-O0-NEXT: v_and_b32_e64 v21, v21, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v21, s5, v21 -; GCN-O0-NEXT: v_or_b32_e64 v17, v17, v21 -; GCN-O0-NEXT: v_and_b32_e64 v20, v20, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v20, s4, v20 -; GCN-O0-NEXT: v_or_b32_e64 v17, v17, v20 -; GCN-O0-NEXT: v_and_b32_e64 v19, v19, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v19, s3, v19 -; GCN-O0-NEXT: v_or_b32_e64 v17, v17, v19 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v18, s2, v18 -; GCN-O0-NEXT: v_or_b32_e64 v19, v17, v18 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 2 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v18, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v17, s8 -; GCN-O0-NEXT: flat_store_byte v[17:18], v19 -; GCN-O0-NEXT: v_and_b32_e64 v9, v9, v3 -; GCN-O0-NEXT: v_and_b32_e64 v16, v16, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v16, v3, v16 -; GCN-O0-NEXT: v_or_b32_e64 v9, v9, v16 -; GCN-O0-NEXT: v_and_b32_e64 v15, v15, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v15, s7, v15 -; GCN-O0-NEXT: v_or_b32_e64 v9, v9, v15 -; GCN-O0-NEXT: v_and_b32_e64 v14, v14, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v14, s6, v14 -; GCN-O0-NEXT: v_or_b32_e64 v9, v9, v14 -; GCN-O0-NEXT: v_and_b32_e64 v13, v13, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v13, s5, v13 -; GCN-O0-NEXT: v_or_b32_e64 v9, v9, v13 -; GCN-O0-NEXT: v_and_b32_e64 v12, v12, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v12, s4, v12 -; GCN-O0-NEXT: v_or_b32_e64 v9, v9, v12 -; GCN-O0-NEXT: v_and_b32_e64 v11, v11, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v11, s3, v11 -; GCN-O0-NEXT: v_or_b32_e64 v9, v9, v11 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v10, s2, v10 -; GCN-O0-NEXT: v_or_b32_e64 v11, v9, v10 -; GCN-O0-NEXT: s_mov_b64 s[12:13], 1 -; GCN-O0-NEXT: s_mov_b32 s8, s0 -; GCN-O0-NEXT: s_mov_b32 s9, s1 -; GCN-O0-NEXT: s_mov_b32 s11, s12 -; GCN-O0-NEXT: s_mov_b32 s10, s13 -; GCN-O0-NEXT: s_add_u32 s8, s8, s11 -; GCN-O0-NEXT: s_addc_u32 s10, s9, s10 -; GCN-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s9, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s8 -; GCN-O0-NEXT: flat_store_byte v[9:10], v11 -; GCN-O0-NEXT: s_waitcnt vmcnt(7) -; GCN-O0-NEXT: v_and_b32_e64 v0, v0, v3 -; GCN-O0-NEXT: v_and_b32_e64 v8, v8, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v8, v3, v8 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v8 -; GCN-O0-NEXT: v_and_b32_e64 v7, v7, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v7, s7, v7 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v7 -; GCN-O0-NEXT: v_and_b32_e64 v6, v6, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v6, s6, v6 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v6 -; GCN-O0-NEXT: v_and_b32_e64 v5, v5, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v5, s5, v5 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v5 -; GCN-O0-NEXT: v_and_b32_e64 v4, v4, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v4, s4, v4 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v4 -; GCN-O0-NEXT: v_and_b32_e64 v2, v2, v3 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v2, s3, v2 -; GCN-O0-NEXT: v_or_b32_e64 v0, v0, v2 -; GCN-O0-NEXT: v_lshlrev_b16_e64 v1, s2, v1 -; GCN-O0-NEXT: v_or_b32_e64 v2, v0, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s0 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 -; GCN-O0-NEXT: flat_store_byte v[0:1], v2 -; GCN-O0-NEXT: s_endpgm entry: %v = insertelement <128 x i1> %vec, i1 1, i32 %sel store <128 x i1> %v, ptr addrspace(1) %out @@ -5682,361 +1910,6 @@ define amdgpu_ps <32 x float> @float32_inselt_vec(<32 x float> %vec, i32 %sel) { ; GCN-NEXT: v_cndmask_b32_e64 v30, 1.0, v30, s[58:59] ; GCN-NEXT: v_cndmask_b32_e64 v31, 1.0, v31, s[60:61] ; GCN-NEXT: ; return to shader part epilog -; -; GCN-O0-LABEL: float32_inselt_vec: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GCN-O0-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GCN-O0-NEXT: s_mov_b32 s10, -1 -; GCN-O0-NEXT: s_mov_b32 s11, 0xe80000 -; GCN-O0-NEXT: s_add_u32 s8, s8, s0 -; GCN-O0-NEXT: s_addc_u32 s9, s9, 0 -; GCN-O0-NEXT: buffer_store_dword v32, off, s[8:11], 0 offset:264 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_mov_b32_e32 v32, v31 -; GCN-O0-NEXT: v_mov_b32_e32 v33, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v34, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v35, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v36, v27 -; GCN-O0-NEXT: v_mov_b32_e32 v37, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v38, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v39, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v40, v23 -; GCN-O0-NEXT: v_mov_b32_e32 v41, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v42, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v43, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v44, v19 -; GCN-O0-NEXT: v_mov_b32_e32 v45, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v46, v17 -; GCN-O0-NEXT: v_mov_b32_e32 v47, v16 -; GCN-O0-NEXT: v_mov_b32_e32 v48, v15 -; GCN-O0-NEXT: v_mov_b32_e32 v49, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v50, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v51, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v52, v11 -; GCN-O0-NEXT: v_mov_b32_e32 v53, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v54, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v55, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v56, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v57, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v58, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v59, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v60, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v61, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v62, v1 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v62 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v61 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v60 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v59 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v58 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v57 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v56 -; GCN-O0-NEXT: v_mov_b32_e32 v8, v55 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v54 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v53 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v52 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v51 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v50 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v49 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v48 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v47 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v42 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v40 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v39 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v38 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v37 -; GCN-O0-NEXT: v_mov_b32_e32 v27, v36 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v35 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v34 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v33 -; GCN-O0-NEXT: v_mov_b32_e32 v31, v32 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:136 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:140 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:144 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:148 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:152 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:156 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:160 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:164 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:168 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:172 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:176 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:180 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:184 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:188 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:192 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:196 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:200 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:204 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:208 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:212 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v20, off, s[8:11], 0 offset:216 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v21, off, s[8:11], 0 offset:220 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v22, off, s[8:11], 0 offset:224 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v23, off, s[8:11], 0 offset:228 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v24, off, s[8:11], 0 offset:232 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v25, off, s[8:11], 0 offset:236 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v26, off, s[8:11], 0 offset:240 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v27, off, s[8:11], 0 offset:244 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v28, off, s[8:11], 0 offset:248 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v29, off, s[8:11], 0 offset:252 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v30, off, s[8:11], 0 offset:256 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v31, off, s[8:11], 0 offset:260 ; 4-byte Folded Spill -; GCN-O0-NEXT: v_mov_b32_e32 v32, 1.0 -; GCN-O0-NEXT: buffer_store_dword v32, off, s[8:11], 0 offset:132 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr64 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v64, s0, 0 -; GCN-O0-NEXT: v_writelane_b32 v64, s1, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v64, off, s[8:11], 0 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:68 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:72 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:76 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:80 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v20, off, s[8:11], 0 offset:84 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v21, off, s[8:11], 0 offset:88 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v22, off, s[8:11], 0 offset:92 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v23, off, s[8:11], 0 offset:96 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v24, off, s[8:11], 0 offset:100 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v25, off, s[8:11], 0 offset:104 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v26, off, s[8:11], 0 offset:108 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v27, off, s[8:11], 0 offset:112 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v28, off, s[8:11], 0 offset:116 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v29, off, s[8:11], 0 offset:120 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v30, off, s[8:11], 0 offset:124 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v31, off, s[8:11], 0 offset:128 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr0_sgpr1 -; GCN-O0-NEXT: .LBB22_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v64, off, s[8:11], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v64, 2 -; GCN-O0-NEXT: v_readlane_b32 s1, v64, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[8:11], 0 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[8:11], 0 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[8:11], 0 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[8:11], 0 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[8:11], 0 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[8:11], 0 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[8:11], 0 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[8:11], 0 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[8:11], 0 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v13, off, s[8:11], 0 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v14, off, s[8:11], 0 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v15, off, s[8:11], 0 offset:64 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v16, off, s[8:11], 0 offset:68 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v17, off, s[8:11], 0 offset:72 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v18, off, s[8:11], 0 offset:76 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v19, off, s[8:11], 0 offset:80 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v20, off, s[8:11], 0 offset:84 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v21, off, s[8:11], 0 offset:88 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v22, off, s[8:11], 0 offset:92 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v23, off, s[8:11], 0 offset:96 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v24, off, s[8:11], 0 offset:100 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v25, off, s[8:11], 0 offset:104 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v26, off, s[8:11], 0 offset:108 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v27, off, s[8:11], 0 offset:112 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v28, off, s[8:11], 0 offset:116 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v29, off, s[8:11], 0 offset:120 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v30, off, s[8:11], 0 offset:124 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v31, off, s[8:11], 0 offset:128 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v32, off, s[8:11], 0 offset:132 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v33, off, s[8:11], 0 offset:264 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s2, v33 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v33 -; GCN-O0-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GCN-O0-NEXT: s_mov_b32 m0, s2 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v32 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:268 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:272 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:276 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:280 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:284 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:288 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:292 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:296 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:300 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:304 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:308 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:312 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:316 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:320 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:324 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:328 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:332 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:336 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:340 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:344 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v20, off, s[8:11], 0 offset:348 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v21, off, s[8:11], 0 offset:352 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v22, off, s[8:11], 0 offset:356 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v23, off, s[8:11], 0 offset:360 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v24, off, s[8:11], 0 offset:364 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v25, off, s[8:11], 0 offset:368 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v26, off, s[8:11], 0 offset:372 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v27, off, s[8:11], 0 offset:376 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v28, off, s[8:11], 0 offset:380 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v29, off, s[8:11], 0 offset:384 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v30, off, s[8:11], 0 offset:388 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v31, off, s[8:11], 0 offset:392 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:68 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:72 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:76 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:80 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v20, off, s[8:11], 0 offset:84 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v21, off, s[8:11], 0 offset:88 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v22, off, s[8:11], 0 offset:92 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v23, off, s[8:11], 0 offset:96 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v24, off, s[8:11], 0 offset:100 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v25, off, s[8:11], 0 offset:104 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v26, off, s[8:11], 0 offset:108 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v27, off, s[8:11], 0 offset:112 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v28, off, s[8:11], 0 offset:116 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v29, off, s[8:11], 0 offset:120 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v30, off, s[8:11], 0 offset:124 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v31, off, s[8:11], 0 offset:128 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[2:3], s[0:1] -; GCN-O0-NEXT: v_writelane_b32 v64, s2, 2 -; GCN-O0-NEXT: v_writelane_b32 v64, s3, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v64, off, s[8:11], 0 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: s_cbranch_execnz .LBB22_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v64, off, s[8:11], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v64, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v64, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v31, off, s[8:11], 0 offset:268 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v32, off, s[8:11], 0 offset:272 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v33, off, s[8:11], 0 offset:276 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v34, off, s[8:11], 0 offset:280 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v35, off, s[8:11], 0 offset:284 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v36, off, s[8:11], 0 offset:288 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v37, off, s[8:11], 0 offset:292 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v38, off, s[8:11], 0 offset:296 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v39, off, s[8:11], 0 offset:300 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v40, off, s[8:11], 0 offset:304 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v41, off, s[8:11], 0 offset:308 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v42, off, s[8:11], 0 offset:312 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v43, off, s[8:11], 0 offset:316 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v44, off, s[8:11], 0 offset:320 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v45, off, s[8:11], 0 offset:324 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v46, off, s[8:11], 0 offset:328 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v47, off, s[8:11], 0 offset:332 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v48, off, s[8:11], 0 offset:336 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v49, off, s[8:11], 0 offset:340 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v50, off, s[8:11], 0 offset:344 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v51, off, s[8:11], 0 offset:348 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v52, off, s[8:11], 0 offset:352 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v53, off, s[8:11], 0 offset:356 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v54, off, s[8:11], 0 offset:360 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v55, off, s[8:11], 0 offset:364 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v56, off, s[8:11], 0 offset:368 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v57, off, s[8:11], 0 offset:372 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v58, off, s[8:11], 0 offset:376 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v59, off, s[8:11], 0 offset:380 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v60, off, s[8:11], 0 offset:384 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v61, off, s[8:11], 0 offset:388 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v62, off, s[8:11], 0 offset:392 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(14) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v31 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v32 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v33 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v34 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v35 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v36 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v37 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v38 -; GCN-O0-NEXT: v_mov_b32_e32 v8, v39 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v40 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v41 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v42 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v43 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v44 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v45 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v46 -; GCN-O0-NEXT: v_mov_b32_e32 v16, v47 -; GCN-O0-NEXT: v_mov_b32_e32 v17, v48 -; GCN-O0-NEXT: s_waitcnt vmcnt(13) -; GCN-O0-NEXT: v_mov_b32_e32 v18, v49 -; GCN-O0-NEXT: s_waitcnt vmcnt(12) -; GCN-O0-NEXT: v_mov_b32_e32 v19, v50 -; GCN-O0-NEXT: s_waitcnt vmcnt(11) -; GCN-O0-NEXT: v_mov_b32_e32 v20, v51 -; GCN-O0-NEXT: s_waitcnt vmcnt(10) -; GCN-O0-NEXT: v_mov_b32_e32 v21, v52 -; GCN-O0-NEXT: s_waitcnt vmcnt(9) -; GCN-O0-NEXT: v_mov_b32_e32 v22, v53 -; GCN-O0-NEXT: s_waitcnt vmcnt(8) -; GCN-O0-NEXT: v_mov_b32_e32 v23, v54 -; GCN-O0-NEXT: s_waitcnt vmcnt(7) -; GCN-O0-NEXT: v_mov_b32_e32 v24, v55 -; GCN-O0-NEXT: s_waitcnt vmcnt(6) -; GCN-O0-NEXT: v_mov_b32_e32 v25, v56 -; GCN-O0-NEXT: s_waitcnt vmcnt(5) -; GCN-O0-NEXT: v_mov_b32_e32 v26, v57 -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v27, v58 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v28, v59 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v29, v60 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v30, v61 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v31, v62 -; GCN-O0-NEXT: ; return to shader part epilog entry: %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel ret <32 x float> %v @@ -6072,1843 +1945,7 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { ; GCN-NEXT: v_cndmask_b32_e64 v14, v14, 0, vcc ; GCN-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: double8_inselt_vec: -; GCN-O0: ; %bb.0: ; %entry -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: v_mov_b32_e32 v17, v15 -; GCN-O0-NEXT: v_mov_b32_e32 v18, v14 -; GCN-O0-NEXT: v_mov_b32_e32 v19, v13 -; GCN-O0-NEXT: v_mov_b32_e32 v20, v12 -; GCN-O0-NEXT: v_mov_b32_e32 v21, v11 -; GCN-O0-NEXT: v_mov_b32_e32 v22, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v23, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v24, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v25, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v26, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v27, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v28, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v29, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v30, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v31, v1 -; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v1, v31 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v30 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v29 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v28 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v27 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v26 -; GCN-O0-NEXT: v_mov_b32_e32 v7, v25 -; GCN-O0-NEXT: v_mov_b32_e32 v8, v24 -; GCN-O0-NEXT: v_mov_b32_e32 v9, v23 -; GCN-O0-NEXT: v_mov_b32_e32 v10, v22 -; GCN-O0-NEXT: v_mov_b32_e32 v11, v21 -; GCN-O0-NEXT: v_mov_b32_e32 v12, v20 -; GCN-O0-NEXT: v_mov_b32_e32 v13, v19 -; GCN-O0-NEXT: v_mov_b32_e32 v14, v18 -; GCN-O0-NEXT: v_mov_b32_e32 v15, v17 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, 1 -; GCN-O0-NEXT: v_lshlrev_b32_e64 v16, s4, v16 -; GCN-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], 1.0 -; GCN-O0-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v33, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v33, s5, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s4 -; GCN-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: v_writelane_b32 v33, s4, 2 -; GCN-O0-NEXT: v_writelane_b32 v33, s5, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_store_dword v33, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB23_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v33, 4 -; GCN-O0-NEXT: v_readlane_b32 s5, v33, 5 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v17 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v17 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v16 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v33, s6, 4 -; GCN-O0-NEXT: v_writelane_b32 v33, s7, 5 -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_store_dword v33, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB23_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v33, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v33, 3 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v33, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v33, 1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b32 s4, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v16, s4 -; GCN-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: v_writelane_b32 v33, s4, 6 -; GCN-O0-NEXT: v_writelane_b32 v33, s5, 7 -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_store_dword v33, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_waitcnt vmcnt(14) -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(14) -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(14) -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(14) -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB23_4: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v33, 8 -; GCN-O0-NEXT: v_readlane_b32 s5, v33, 9 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v17 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v17 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v1, v16 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v33, s6, 8 -; GCN-O0-NEXT: v_writelane_b32 v33, s7, 9 -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_store_dword v33, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB23_4 -; GCN-O0-NEXT: ; %bb.5: -; GCN-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[10:11] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v33, 6 -; GCN-O0-NEXT: v_readlane_b32 s5, v33, 7 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.6: -; GCN-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(14) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v15 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v16 -; GCN-O0-NEXT: s_waitcnt vmcnt(13) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v17 -; GCN-O0-NEXT: s_waitcnt vmcnt(12) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v18 -; GCN-O0-NEXT: s_waitcnt vmcnt(11) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v19 -; GCN-O0-NEXT: s_waitcnt vmcnt(10) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v20 -; GCN-O0-NEXT: s_waitcnt vmcnt(9) -; GCN-O0-NEXT: v_mov_b32_e32 v6, v21 -; GCN-O0-NEXT: s_waitcnt vmcnt(8) -; GCN-O0-NEXT: v_mov_b32_e32 v7, v22 -; GCN-O0-NEXT: s_waitcnt vmcnt(7) -; GCN-O0-NEXT: v_mov_b32_e32 v8, v23 -; GCN-O0-NEXT: s_waitcnt vmcnt(6) -; GCN-O0-NEXT: v_mov_b32_e32 v9, v24 -; GCN-O0-NEXT: s_waitcnt vmcnt(5) -; GCN-O0-NEXT: v_mov_b32_e32 v10, v25 -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v11, v26 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v12, v27 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v13, v28 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v14, v29 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v15, v30 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] entry: %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel ret <8 x double> %v } - -define <3 x i32> @insert_dyn_i32_3(<3 x i32> inreg %arg, i32 %idx, i32 %val) { -; GCN-LABEL: insert_dyn_i32_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v4 -; GCN-NEXT: v_mov_b32_e32 v1, v3 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_i32_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v5, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v5, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB24_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v4 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v4 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v3 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v5, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v5, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB24_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v3 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v4 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <3 x i32> %arg, i32 %val, i32 %idx - ret <3 x i32> %x -} - -define <3 x i32> @insert_dyn_inreg_i32_3(<3 x i32> inreg %arg, i32 inreg %idx, i32 %val) { -; GCN-LABEL: insert_dyn_inreg_i32_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s19, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s19, 1 -; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s19, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_i32_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s6 -; GCN-O0-NEXT: s_mov_b32 m0, s19 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v4 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <3 x i32> %arg, i32 %val, i32 %idx - ret <3 x i32> %x -} - -define <3 x float> @insert_dyn_float_3(<3 x float> inreg %arg, i32 %idx, float %val) { -; GCN-LABEL: insert_dyn_float_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v4 -; GCN-NEXT: v_mov_b32_e32 v1, v3 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_float_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v5, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v5, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB26_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v4 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v4 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v3 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v5, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v5, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB26_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v5, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v5, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v2 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v3 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v4 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <3 x float> %arg, float %val, i32 %idx - ret <3 x float> %x -} - -define <3 x float> @insert_dyn_inreg_float_3(<3 x float> inreg %arg, i32 inreg %idx, float %val) { -; GCN-LABEL: insert_dyn_inreg_float_3: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s19, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s19, 1 -; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s19, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_float_3: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10 killed $sgpr4_sgpr5_sgpr6 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s6 -; GCN-O0-NEXT: s_mov_b32 m0, s19 -; GCN-O0-NEXT: v_movreld_b32_e32 v2, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v3 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v4 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <3 x float> %arg, float %val, i32 %idx - ret <3 x float> %x -} - -define <5 x i32> @insert_dyn_i32_5(<5 x i32> inreg %arg, i32 %idx, i32 %val) { -; GCN-LABEL: insert_dyn_i32_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v6 -; GCN-NEXT: v_mov_b32_e32 v1, v5 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_i32_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr9 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v9, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v9, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v6 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v6 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v5 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v9, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v9, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB28_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v4 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v5 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v6 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v7 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v8 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <5 x i32> %arg, i32 %val, i32 %idx - ret <5 x i32> %x -} - -define <5 x i32> @insert_dyn_inreg_i32_5(<5 x i32> inreg %arg, i32 inreg %idx, i32 %val) { -; GCN-LABEL: insert_dyn_inreg_i32_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s21, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 1 -; GCN-NEXT: v_cndmask_b32_e32 v5, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 3 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 4 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v5 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_i32_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s8 -; GCN-O0-NEXT: s_mov_b32 m0, s21 -; GCN-O0-NEXT: v_movreld_b32_e32 v4, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v8 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <5 x i32> %arg, i32 %val, i32 %idx - ret <5 x i32> %x -} - -define <5 x float> @insert_dyn_float_5(<5 x float> inreg %arg, i32 %idx, float %val) { -; GCN-LABEL: insert_dyn_float_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v6 -; GCN-NEXT: v_mov_b32_e32 v1, v5 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_float_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr9 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v9, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v9, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB30_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v6 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v6 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v5 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v9, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v9, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB30_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v9, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v9, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v4 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v5 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v6 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v7 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v8 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <5 x float> %arg, float %val, i32 %idx - ret <5 x float> %x -} - -define <5 x float> @insert_dyn_inreg_float_5(<5 x float> inreg %arg, i32 inreg %idx, float %val) { -; GCN-LABEL: insert_dyn_inreg_float_5: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s21, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 1 -; GCN-NEXT: v_cndmask_b32_e32 v5, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 3 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s21, 4 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v5 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_float_5: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s8 -; GCN-O0-NEXT: s_mov_b32 m0, s21 -; GCN-O0-NEXT: v_movreld_b32_e32 v4, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v8 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <5 x float> %arg, float %val, i32 %idx - ret <5 x float> %x -} - -define <6 x i32> @insert_dyn_i32_6(<6 x i32> inreg %arg, i32 %idx, i32 %val) { -; GCN-LABEL: insert_dyn_i32_6: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v7, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v6 -; GCN-NEXT: v_mov_b32_e32 v1, v7 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_i32_6: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v11, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v11, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB32_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v11, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v11, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v7 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v6 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v11, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v11, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB32_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v11, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v11, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(5) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v5 -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v6 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v8 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v9 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v10 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <6 x i32> %arg, i32 %val, i32 %idx - ret <6 x i32> %x -} - -define <6 x i32> @insert_dyn_inreg_i32_6(<6 x i32> inreg %arg, i32 inreg %idx, i32 %val) { -; GCN-LABEL: insert_dyn_inreg_i32_6: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s22, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 1 -; GCN-NEXT: v_cndmask_b32_e32 v6, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 3 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 4 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 5 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v6 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_i32_6: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s4 -; GCN-O0-NEXT: s_mov_b32 m0, s22 -; GCN-O0-NEXT: v_movreld_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v10 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <6 x i32> %arg, i32 %val, i32 %idx - ret <6 x i32> %x -} - -define <6 x float> @insert_dyn_float_6(<6 x float> inreg %arg, i32 %idx, float %val) { -; GCN-LABEL: insert_dyn_float_6: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v7, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v6 -; GCN-NEXT: v_mov_b32_e32 v1, v7 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_float_6: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v11, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v11, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB34_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v11, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v11, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v7 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v7 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v6 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v11, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v11, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB34_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[22:23] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v11, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v11, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(5) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v5 -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v6 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v8 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v9 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v10 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <6 x float> %arg, float %val, i32 %idx - ret <6 x float> %x -} - -define <6 x float> @insert_dyn_inreg_float_6(<6 x float> inreg %arg, i32 inreg %idx, float %val) { -; GCN-LABEL: insert_dyn_inreg_float_6: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s22, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 1 -; GCN-NEXT: v_cndmask_b32_e32 v6, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 3 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 4 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s22, 5 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v6 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_float_6: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s4 -; GCN-O0-NEXT: s_mov_b32 m0, s22 -; GCN-O0-NEXT: v_movreld_b32_e32 v5, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v5 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v10 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <6 x float> %arg, float %val, i32 %idx - ret <6 x float> %x -} - -define <7 x i32> @insert_dyn_i32_7(<7 x i32> inreg %arg, i32 %idx, i32 %val) { -; GCN-LABEL: insert_dyn_i32_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v8, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v7, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v6, s22 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v8 -; GCN-NEXT: v_mov_b32_e32 v1, v7 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_i32_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v13, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v13, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB36_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v13, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v13, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v8 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v8 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v7 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v13, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v13, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB36_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v13, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v13, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(6) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v6 -; GCN-O0-NEXT: s_waitcnt vmcnt(5) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v7 -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v8 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v9 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v10 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v11 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <7 x i32> %arg, i32 %val, i32 %idx - ret <7 x i32> %x -} - -define <7 x i32> @insert_dyn_inreg_i32_7(<7 x i32> inreg %arg, i32 inreg %idx, i32 %val) { -; GCN-LABEL: insert_dyn_inreg_i32_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s23, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 1 -; GCN-NEXT: v_cndmask_b32_e32 v7, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 3 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 4 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 5 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 6 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v6, s22 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v7 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_i32_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 -; GCN-O0-NEXT: s_mov_b32 m0, s23 -; GCN-O0-NEXT: v_movreld_b32_e32 v6, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v11 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <7 x i32> %arg, i32 %val, i32 %idx - ret <7 x i32> %x -} - -define <7 x float> @insert_dyn_float_7(<7 x float> inreg %arg, i32 %idx, float %val) { -; GCN-LABEL: insert_dyn_float_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v2, s16 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v8, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s17 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v7, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v6, s22 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v8 -; GCN-NEXT: v_mov_b32_e32 v1, v7 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_float_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v5, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s10 -; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_writelane_b32 v13, s4, 0 -; GCN-O0-NEXT: v_writelane_b32 v13, s5, 1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-O0-NEXT: .LBB38_1: ; =>This Inner Loop Header: Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v13, 2 -; GCN-O0-NEXT: v_readlane_b32 s5, v13, 3 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readfirstlane_b32 s6, v8 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v8 -; GCN-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GCN-O0-NEXT: s_mov_b32 m0, s6 -; GCN-O0-NEXT: v_movreld_b32_e32 v0, v7 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v13, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v13, s7, 3 -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_cbranch_execnz .LBB38_1 -; GCN-O0-NEXT: ; %bb.2: -; GCN-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[26:27] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v13, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v13, 1 -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; %bb.3: -; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(6) -; GCN-O0-NEXT: v_mov_b32_e32 v0, v6 -; GCN-O0-NEXT: s_waitcnt vmcnt(5) -; GCN-O0-NEXT: v_mov_b32_e32 v1, v7 -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_mov_b32_e32 v2, v8 -; GCN-O0-NEXT: s_waitcnt vmcnt(3) -; GCN-O0-NEXT: v_mov_b32_e32 v3, v9 -; GCN-O0-NEXT: s_waitcnt vmcnt(2) -; GCN-O0-NEXT: v_mov_b32_e32 v4, v10 -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v11 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <7 x float> %arg, float %val, i32 %idx - ret <7 x float> %x -} - -define <7 x float> @insert_dyn_inreg_float_7(<7 x float> inreg %arg, i32 inreg %idx, float %val) { -; GCN-LABEL: insert_dyn_inreg_float_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_cmp_eq_u32 s23, 0 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 1 -; GCN-NEXT: v_cndmask_b32_e32 v7, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 2 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v2, s18 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 3 -; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v3, s19 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 4 -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v4, s20 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 5 -; GCN-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s21 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s23, 6 -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v6, s22 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc -; GCN-NEXT: v_mov_b32_e32 v0, v7 -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; GCN-O0-LABEL: insert_dyn_inreg_float_7: -; GCN-O0: ; %bb.0: -; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: s_mov_b32 s4, s16 -; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: s_mov_b32 s5, s17 -; GCN-O0-NEXT: s_mov_b32 s6, s18 -; GCN-O0-NEXT: s_mov_b32 s7, s19 -; GCN-O0-NEXT: s_mov_b32 s8, s20 -; GCN-O0-NEXT: s_mov_b32 s9, s21 -; GCN-O0-NEXT: s_mov_b32 s10, s22 -; GCN-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18 killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 -; GCN-O0-NEXT: v_mov_b32_e32 v12, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v11, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v10, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v9, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v8, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v7, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 -; GCN-O0-NEXT: s_mov_b32 m0, s23 -; GCN-O0-NEXT: v_movreld_b32_e32 v6, v0 -; GCN-O0-NEXT: v_mov_b32_e32 v0, v6 -; GCN-O0-NEXT: v_mov_b32_e32 v1, v7 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v8 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v9 -; GCN-O0-NEXT: v_mov_b32_e32 v4, v10 -; GCN-O0-NEXT: v_mov_b32_e32 v5, v11 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v12 -; GCN-O0-NEXT: s_setpc_b64 s[30:31] - %x = insertelement <7 x float> %arg, float %val, i32 %idx - ret <7 x float> %x -}