Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2089,7 +2089,8 @@ void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
return;

SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
SDValue Chain = Node->getOperand(0);
SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
SDLoc DL(Node);
EVT VT = Node->getValueType(0);

Expand All @@ -2110,14 +2111,15 @@ void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
unsigned NumOutVecs,
unsigned Opc) {

SDValue ZtValue;
SmallVector<SDValue, 4> Ops;
if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
return;

Ops.push_back(ZtValue);
Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
SDValue Chain = Node->getOperand(0);
SDValue Ops[] = {ZtValue,
createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
Chain};

SDLoc DL(Node);
EVT VT = Node->getValueType(0);

Expand Down
13 changes: 8 additions & 5 deletions llvm/test/CodeGen/AArch64/pr161420.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx15.0.0"

; From: https://github.com/llvm/llvm-project/issues/161420. This test checks that
; two `luti4` instructions are emitted. FIXME: This is currently broken!
; two `luti4` instructions are emitted.
define void @pluto(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) #0 {
; CHECK-LABEL: pluto:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: mov w8, #0 ; =0x0
; CHECK-NEXT: ldr zt0, [x1]
; CHECK-NEXT: ldr z0, [x3]
; CHECK-NEXT: ldr z4, [x3]
; CHECK-NEXT: ptrue pn8.h
; CHECK-NEXT: ld1h { z4.h - z7.h }, pn8/z, [x0]
; CHECK-NEXT: luti4 { z0.h - z3.h }, zt0, z0[0]
; CHECK-NEXT: fmla za.h[w8, 2, vgx4], { z4.h - z7.h }, { z0.h - z3.h }
; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0]
; CHECK-NEXT: luti4 { z16.h - z19.h }, zt0, z4[0]
; CHECK-NEXT: fmla za.h[w8, 0, vgx4], { z0.h - z3.h }, { z16.h - z19.h }
; CHECK-NEXT: ldr zt0, [x2]
; CHECK-NEXT: luti4 { z4.h - z7.h }, zt0, z4[0]
; CHECK-NEXT: fmla za.h[w8, 2, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
; CHECK-NEXT: ret
bb:
tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %arg1)
Expand Down
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@ define {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscal
}

; Tests multiple identical luti4 intrinsics with ZT0 loads interspersed, are not CSD'd.
; FIXME: This is currently broken!
define void @test_multiple_luti4_zt_i8(ptr %ptrA, ptr %ptrB, <vscale x 16 x i8> %x) {
; CHECK-LABEL: test_multiple_luti4_zt_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr zt0, [x0]
; CHECK-NEXT: luti4 { z4.s - z7.s }, zt0, z0[1]
; CHECK-NEXT: // fake_use: $z4 $z4_z5_z6_z7
; CHECK-NEXT: ldr zt0, [x1]
; CHECK-NEXT: luti4 { z0.s - z3.s }, zt0, z0[1]
; CHECK-NEXT: // fake_use: $z0 $z0_z1_z2_z3
; CHECK-NEXT: ret
Expand Down
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16
}

; Tests multiple identical luti4 intrinsics with ZT0 loads interspersed, are not CSD'd.
; FIXME: This is currently broken!
define void @test_multiple_luti4_zt_i8(ptr %ptrA, ptr %ptrB, <vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) #0 {
; CHECK-LABEL: test_multiple_luti4_zt_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr zt0, [x0]
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: luti4 { z4.b - z7.b }, zt0, { z0, z1 }
; CHECK-NEXT: // fake_use: $z4 $z4_z5_z6_z7
; CHECK-NEXT: ldr zt0, [x1]
; CHECK-NEXT: luti4 { z0.b - z3.b }, zt0, { z0, z1 }
; CHECK-NEXT: // fake_use: $z0 $z0_z1_z2_z3
; CHECK-NEXT: ret
Expand Down