|
3 | 3 | ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8,GFX678,ALL
|
4 | 4 | ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10,GFX1011,ALL
|
5 | 5 | ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX1011,ALL
|
| 6 | +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,ALL |
6 | 7 |
|
7 | 8 | ; ALL-LABEL: {{^}}build_vector2:
|
8 | 9 | ; R600: MOV
|
@@ -96,3 +97,99 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
|
96 | 97 | store <2 x i16> %ins.1, ptr addrspace(1) %out
|
97 | 98 | ret void
|
98 | 99 | }
|
| 100 | + |
| 101 | +; R600-LABEL: build_v2i32_from_v4i16_shuffle: |
| 102 | +; R600: ; %bb.0: ; %entry |
| 103 | +; R600-NEXT: ALU 0, @10, KC0[], KC1[] |
| 104 | +; R600-NEXT: TEX 1 @6 |
| 105 | +; R600-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] |
| 106 | +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 |
| 107 | +; R600-NEXT: CF_END |
| 108 | +; R600-NEXT: PAD |
| 109 | +; R600-NEXT: Fetch clause starting at 6: |
| 110 | +; R600-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3 |
| 111 | +; R600-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 |
| 112 | +; R600-NEXT: ALU clause starting at 10: |
| 113 | +; R600-NEXT: MOV * T0.X, 0.0, |
| 114 | +; R600-NEXT: ALU clause starting at 11: |
| 115 | +; R600-NEXT: LSHL * T0.Y, T1.X, literal.x, |
| 116 | +; R600-NEXT: 16(2.242078e-44), 0(0.000000e+00) |
| 117 | +; R600-NEXT: LSHL T0.X, T0.X, literal.x, |
| 118 | +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, |
| 119 | +; R600-NEXT: 16(2.242078e-44), 2(2.802597e-45) |
| 120 | +; |
| 121 | +; GFX6-LABEL: build_v2i32_from_v4i16_shuffle: |
| 122 | +; GFX6: ; %bb.0: ; %entry |
| 123 | +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 |
| 124 | +; GFX6-NEXT: s_mov_b32 s7, 0xf000 |
| 125 | +; GFX6-NEXT: s_waitcnt lgkmcnt(0) |
| 126 | +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 |
| 127 | +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 |
| 128 | +; GFX6-NEXT: s_mov_b32 s6, -1 |
| 129 | +; GFX6-NEXT: s_mov_b32 s4, s0 |
| 130 | +; GFX6-NEXT: s_mov_b32 s5, s1 |
| 131 | +; GFX6-NEXT: v_mov_b32_e32 v0, s2 |
| 132 | +; GFX6-NEXT: v_mov_b32_e32 v1, s3 |
| 133 | +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| 134 | +; GFX6-NEXT: s_endpgm |
| 135 | +; |
| 136 | +; GFX8-LABEL: build_v2i32_from_v4i16_shuffle: |
| 137 | +; GFX8: ; %bb.0: ; %entry |
| 138 | +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| 139 | +; GFX8-NEXT: s_mov_b32 s7, 0xf000 |
| 140 | +; GFX8-NEXT: s_mov_b32 s6, -1 |
| 141 | +; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| 142 | +; GFX8-NEXT: s_mov_b32 s4, s0 |
| 143 | +; GFX8-NEXT: s_mov_b32 s5, s1 |
| 144 | +; GFX8-NEXT: s_lshl_b32 s0, s3, 16 |
| 145 | +; GFX8-NEXT: s_lshl_b32 s1, s2, 16 |
| 146 | +; GFX8-NEXT: v_mov_b32_e32 v0, s1 |
| 147 | +; GFX8-NEXT: v_mov_b32_e32 v1, s0 |
| 148 | +; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| 149 | +; GFX8-NEXT: s_endpgm |
| 150 | +; |
| 151 | +; GFX10-LABEL: build_v2i32_from_v4i16_shuffle: |
| 152 | +; GFX10: ; %bb.0: ; %entry |
| 153 | +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 |
| 154 | +; GFX10-NEXT: v_mov_b32_e32 v2, 0 |
| 155 | +; GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| 156 | +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 |
| 157 | +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 |
| 158 | +; GFX10-NEXT: v_mov_b32_e32 v0, s2 |
| 159 | +; GFX10-NEXT: v_mov_b32_e32 v1, s3 |
| 160 | +; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] |
| 161 | +; GFX10-NEXT: s_endpgm |
| 162 | +; |
| 163 | +; GFX11-LABEL: build_v2i32_from_v4i16_shuffle: |
| 164 | +; GFX11: ; %bb.0: ; %entry |
| 165 | +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 |
| 166 | +; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| 167 | +; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| 168 | +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 |
| 169 | +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 |
| 170 | +; GFX11-NEXT: v_mov_b32_e32 v0, s2 |
| 171 | +; GFX11-NEXT: v_mov_b32_e32 v1, s3 |
| 172 | +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] |
| 173 | +; GFX11-NEXT: s_nop 0 |
| 174 | +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 175 | +; GFX11-NEXT: s_endpgm |
| 176 | +; |
| 177 | +; GFX940-LABEL: build_v2i32_from_v4i16_shuffle: |
| 178 | +; GFX940: ; %bb.0: ; %entry |
| 179 | +; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 |
| 180 | +; GFX940-NEXT: v_mov_b32_e32 v2, 0 |
| 181 | +; GFX940-NEXT: s_waitcnt lgkmcnt(0) |
| 182 | +; GFX940-NEXT: s_lshl_b32 s3, s3, 16 |
| 183 | +; GFX940-NEXT: s_lshl_b32 s2, s2, 16 |
| 184 | +; GFX940-NEXT: v_mov_b32_e32 v0, s2 |
| 185 | +; GFX940-NEXT: v_mov_b32_e32 v1, s3 |
| 186 | +; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 |
| 187 | +; GFX940-NEXT: s_endpgm |
| 188 | +define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) { |
| 189 | +entry: |
| 190 | + %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2> |
| 191 | + %zextended = zext <2 x i16> %shuf to <2 x i32> |
| 192 | + %shifted = shl <2 x i32> %zextended, <i32 16, i32 16> |
| 193 | + store <2 x i32> %shifted, ptr addrspace(1) %out |
| 194 | + ret void |
| 195 | +} |
0 commit comments