Skip to content

Commit

Permalink
[AMDGPU] In VectorLegalizer::Expand, if UnrollVectorOp returns Load, … (
Browse files Browse the repository at this point in the history
#88475)

…return only Load since other output is chain.

Added testcase that showed mismatched expected arity when Load and chain
were returned as separate items after
003b58f
  • Loading branch information
choikwa authored Apr 16, 2024
1 parent ca4cf97 commit 422bf13
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 2 deletions.
10 changes: 8 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,8 +1159,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
}

SDValue Unrolled = DAG.UnrollVectorOp(Node);
for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
Results.push_back(Unrolled.getValue(I));
if (Node->getNumValues() == 1) {
Results.push_back(Unrolled);
} else {
assert(Node->getNumValues() == Unrolled->getNumValues() &&
"VectorLegalizer Expand returned wrong number of results!");
for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
Results.push_back(Unrolled.getValue(I));
}
}

SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
Expand Down
97 changes: 97 additions & 0 deletions llvm/test/CodeGen/AMDGPU/build_vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8,GFX678,ALL
; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10,GFX1011,ALL
; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX1011,ALL
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,ALL

; ALL-LABEL: {{^}}build_vector2:
; R600: MOV
Expand Down Expand Up @@ -96,3 +97,99 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
store <2 x i16> %ins.1, ptr addrspace(1) %out
ret void
}

; R600-LABEL: build_v2i32_from_v4i16_shuffle:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 0, @10, KC0[], KC1[]
; R600-NEXT: TEX 1 @6
; R600-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; R600-NEXT: CF_END
; R600-NEXT: PAD
; R600-NEXT: Fetch clause starting at 6:
; R600-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3
; R600-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
; R600-NEXT: ALU clause starting at 10:
; R600-NEXT: MOV * T0.X, 0.0,
; R600-NEXT: ALU clause starting at 11:
; R600-NEXT: LSHL * T0.Y, T1.X, literal.x,
; R600-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; R600-NEXT: LSHL T0.X, T0.X, literal.x,
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; R600-NEXT: 16(2.242078e-44), 2(2.802597e-45)
;
; GFX6-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: v_mov_b32_e32 v0, s2
; GFX6-NEXT: v_mov_b32_e32 v1, s3
; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_mov_b32 s7, 0xf000
; GFX8-NEXT: s_mov_b32 s6, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, s0
; GFX8-NEXT: s_mov_b32 s5, s1
; GFX8-NEXT: s_lshl_b32 s0, s3, 16
; GFX8-NEXT: s_lshl_b32 s1, s2, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s1
; GFX8-NEXT: v_mov_b32_e32 v1, s0
; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX8-NEXT: s_endpgm
;
; GFX10-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_lshl_b32 s2, s2, 16
; GFX10-NEXT: s_lshl_b32 s3, s3, 16
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_lshl_b32 s2, s2, 16
; GFX11-NEXT: s_lshl_b32 s3, s3, 16
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: v_mov_b32_e32 v1, s3
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX940-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s3, s3, 16
; GFX940-NEXT: s_lshl_b32 s2, s2, 16
; GFX940-NEXT: v_mov_b32_e32 v0, s2
; GFX940-NEXT: v_mov_b32_e32 v1, s3
; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
; GFX940-NEXT: s_endpgm
define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) {
entry:
%shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2>
%zextended = zext <2 x i16> %shuf to <2 x i32>
%shifted = shl <2 x i32> %zextended, <i32 16, i32 16>
store <2 x i32> %shifted, ptr addrspace(1) %out
ret void
}

0 comments on commit 422bf13

Please sign in to comment.