From 55851e06c82a5f2050ff3894ee7f05dee259ae84 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Mon, 18 Sep 2023 16:57:23 +0000 Subject: [PATCH 1/3] [AArch64][SVE2] Do not emit RSHRNB for large shifts rshrnb's shift amount operand must be between 1-EltSizeInBits. This patch stops RSHRNB ISD nodes being emitted in this case --- .../Target/AArch64/AArch64ISelLowering.cpp | 3 ++ .../AArch64/sve2-intrinsics-combine-rshrnb.ll | 46 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5cc001c44e7a2..4ef97e682b7bf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20241,6 +20241,9 @@ static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, return SDValue(); unsigned ShiftValue = SrlOp1->getZExtValue(); + if (ShiftValue > ResVT.getScalarSizeInBits()) + return SDValue(); + SDValue Add = Srl->getOperand(0); if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse()) return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll index f94daa45fb82a..fe86a94e30357 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll @@ -142,6 +142,52 @@ define void @wide_add_shift_add_rshrnb_h(ptr %dest, i64 %index, %arg1){ +; CHECK-LABEL: wide_add_shift_add_rshrnb_d: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: rshrnb z1.s, z1.d, #32 +; CHECK-NEXT: rshrnb z0.s, z0.d, #32 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %1 = add %arg1, shufflevector ( insertelement ( poison, i64 2147483648, i64 0), poison, zeroinitializer) + %2 = lshr %1, shufflevector ( insertelement ( poison, i64 32, i64 0), poison, zeroinitializer) + %3 = getelementptr inbounds i32, ptr %dest, i64 %index + %load = load , ptr %3, align 4 + %4 = trunc %2 to + %5 = add %load, %4 + store %5, ptr %3, align 4 + ret void +} + +; Do not emit rshrnb if the shift amount is larger than the dest eltsize in bits +define void @neg_wide_add_shift_add_rshrnb_d(ptr %dest, i64 %index, %arg1){ +; CHECK-LABEL: neg_wide_add_shift_add_rshrnb_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #0x800000000000 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add z0.d, z0.d, z2.d +; CHECK-NEXT: add z1.d, z1.d, z2.d +; CHECK-NEXT: lsr z1.d, z1.d, #48 +; CHECK-NEXT: lsr z0.d, z0.d, #48 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %1 = add %arg1, shufflevector ( insertelement ( poison, i64 140737488355328, i64 0), poison, zeroinitializer) + %2 = lshr %1, shufflevector ( insertelement ( poison, i64 48, i64 0), poison, zeroinitializer) + %3 = getelementptr inbounds i32, ptr %dest, i64 %index + %load = load , ptr %3, align 4 + %4 = trunc %2 to + %5 = add %load, %4 + store %5, ptr %3, align 4 + ret void +} + define void @neg_trunc_lsr_add_op1_not_splat(ptr %ptr, ptr %dst, i64 %index, %add_op1){ ; CHECK-LABEL: neg_trunc_lsr_add_op1_not_splat: ; CHECK: // %bb.0: From 3573db31217c1b86c62d6c135745a781312cae28 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Wed, 20 Sep 2023 12:05:59 +0000 Subject: [PATCH 2/3] [AArch64][SVE2] Do not emit RSHRNB for large shifts rshrnb's shift amount operand must be between 1-EltSizeInBits. This patch stops RSHRNB ISD nodes being emitted in this case --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++-- .../AArch64/sve2-intrinsics-combine-rshrnb.ll | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ef97e682b7bf..5340896dd7a30 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20241,8 +20241,9 @@ static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, return SDValue(); unsigned ShiftValue = SrlOp1->getZExtValue(); - if (ShiftValue > ResVT.getScalarSizeInBits()) - return SDValue(); + uint64_t EltSize = ResVT.getScalarSizeInBits(); + if (ShiftValue > EltSize) + ShiftValue = EltSize; SDValue Add = Srl->getOperand(0); if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse()) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll index fe86a94e30357..a913177623df9 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-combine-rshrnb.ll @@ -100,6 +100,23 @@ define void @neg_add_lshr_rshrnb_h_0(ptr %ptr, ptr %dst, i64 %index){ ret void } +define void @neg_zero_shift(ptr %ptr, ptr %dst, i64 %index){ +; CHECK-LABEL: neg_zero_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: add z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1] +; CHECK-NEXT: ret + %load = load , ptr %ptr, align 2 + %1 = add %load, trunc ( shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) to ) + %2 = lshr %1, trunc ( shufflevector ( insertelement ( poison, i64 0, i64 0), poison, zeroinitializer) to ) + %3 = trunc %2 to + %4 = getelementptr inbounds i16, ptr %dst, i64 %index + store %3, ptr %4, align 1 + ret void +} + define void @wide_add_shift_add_rshrnb_b(ptr %dest, i64 %index, %arg1){ ; CHECK-LABEL: wide_add_shift_add_rshrnb_b: ; CHECK: // %bb.0: From 3a8af1b5f51fb26bf1ec47e82b3f7aa46cfef048 Mon Sep 17 00:00:00 2001 From: Matt Devereau Date: Wed, 20 Sep 2023 15:50:39 +0000 Subject: [PATCH 3/3] [AArch64][SVE2] Do not emit RSHRNB for large shifts rshrnb's shift amount operand must be between 1-EltSizeInBits. This patch stops RSHRNB ISD nodes being emitted in this case --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 5340896dd7a30..d3eb82a8c5559 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20240,10 +20240,8 @@ static SDValue trySimplifySrlAddToRshrnb(SDValue Srl, SelectionDAG &DAG, if (!SrlOp1) return SDValue(); unsigned ShiftValue = SrlOp1->getZExtValue(); - - uint64_t EltSize = ResVT.getScalarSizeInBits(); - if (ShiftValue > EltSize) - ShiftValue = EltSize; + if (ShiftValue < 1 || ShiftValue > ResVT.getScalarSizeInBits()) + return SDValue(); SDValue Add = Srl->getOperand(0); if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse())