From 9ac2de51dce5e7549dd54922881c23879821fa07 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2024 10:58:55 -0700 Subject: [PATCH 1/8] [RISCV] Add isel optimization for (add x, (and (sra y, c2), c1)) to recover regression from #101751. If c1 is a shifted mask with c3 leading zeros and c4 trailing zeros. If c2 is greater than c3, we can use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as the X ammount. I'll also improve the non-Zba case in a follow up patch. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 27 +++++++++++++++++++++ llvm/test/CodeGen/RISCV/rv64zba.ll | 23 ++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 35681c620eed5..bcc719737b405 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3019,6 +3019,33 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, return true; } } + } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() && + isa(N.getOperand(1))) { + uint64_t Mask = N.getConstantOperandVal(1); + unsigned C2 = N0.getConstantOperandVal(1); + + // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 + // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can + // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as + // the X ammount. + if (isShiftedMask_64(Mask)) { + unsigned XLen = Subtarget->getXLen(); + unsigned Leading = XLen - llvm::bit_width(Mask); + unsigned Trailing = llvm::countr_zero(Mask); + if (C2 > Leading && Trailing == ShAmt) { + SDLoc DL(N); + EVT VT = N.getValueType(); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRAI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(C2 - Leading, DL, VT)), + 0); + Val = SDValue(CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, Val, + CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)), + 0); + return true; + } + } } } else if (bool LeftShift = N.getOpcode() == ISD::SHL; (LeftShift || N.getOpcode() == ISD::SRL) && diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 87796e2c7b72e..b1b4707265d7f 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -2988,3 +2988,26 @@ entry: %2 = and i64 %1, 34359738360 ret i64 %2 } + +define ptr @srai_srli_sh3add(ptr %0, i64 %1) nounwind { +; RV64I-LABEL: srai_srli_sh3add: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a1, 35 +; RV64I-NEXT: li a2, -57 +; RV64I-NEXT: srli a2, a2, 3 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srai_srli_sh3add: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: srai a1, a1, 32 +; RV64ZBA-NEXT: srli a1, a1, 6 +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: ret +entry: + %2 = ashr i64 %1, 32 + %3 = lshr i64 %2, 6 + %4 = getelementptr i64, ptr %0, i64 %3 + ret ptr %4 +} From 02e834569609139f648a316762006168adbde31d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2024 11:38:55 -0700 Subject: [PATCH 2/8] fixup! fix typo in comment --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index bcc719737b405..30b53148cefc8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3027,7 +3027,7 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as - // the X ammount. + // the X amount. if (isShiftedMask_64(Mask)) { unsigned XLen = Subtarget->getXLen(); unsigned Leading = XLen - llvm::bit_width(Mask); From 76711df9155261da3f528896c57915c536e325da Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2024 11:38:26 -0700 Subject: [PATCH 3/8] fixup! Add non-Zba case too. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 25 +++++++++++++++++++-- llvm/test/CodeGen/RISCV/rv64zba.ll | 22 ++++++++++++++---- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 30b53148cefc8..1ee095fbf0e45 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1451,8 +1451,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { const uint64_t C1 = N1C->getZExtValue(); - // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a mask - // with c3 leading zeros and c2 is larger than c3. if (N0.getOpcode() == ISD::SRA && isa(N0.getOperand(1)) && N0.hasOneUse()) { unsigned C2 = N0.getConstantOperandVal(1); @@ -1466,6 +1464,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { X.getOpcode() == ISD::SHL && isa(X.getOperand(1)) && X.getConstantOperandVal(1) == 32; + // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a + // mask with c3 leading zeros and c2 is larger than c3. if (isMask_64(C1) && !Skip) { unsigned Leading = XLen - llvm::bit_width(C1); if (C2 > Leading) { @@ -1479,6 +1479,27 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } } + + // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 + // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can + // use (slli (srli (srai y, c2 - c3), c3 + c4), c4). + if (isShiftedMask_64(C1)) { + unsigned Leading = XLen - llvm::bit_width(C1); + unsigned Trailing = llvm::countr_zero(C1); + if (C2 > Leading && Trailing > 0) { + SDNode *SRAI = CurDAG->getMachineNode( + RISCV::SRAI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(C2 - Leading, DL, VT)); + SDNode *SRLI = CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, SDValue(SRAI, 0), + CurDAG->getTargetConstant(Leading + Trailing, DL, VT)); + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, VT, SDValue(SRLI, 0), + CurDAG->getTargetConstant(Trailing, DL, VT)); + ReplaceNode(Node, SLLI); + return; + } + } } // If C1 masks off the upper bits only (but can't be formed as an diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index b1b4707265d7f..c589b5ccb53d6 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -2992,10 +2992,9 @@ entry: define ptr @srai_srli_sh3add(ptr %0, i64 %1) nounwind { ; RV64I-LABEL: srai_srli_sh3add: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: srai a1, a1, 35 -; RV64I-NEXT: li a2, -57 -; RV64I-NEXT: srli a2, a2, 3 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srai a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 6 +; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; @@ -3011,3 +3010,18 @@ entry: %4 = getelementptr i64, ptr %0, i64 %3 ret ptr %4 } + +define ptr @srai_srli_slli(ptr %0, i64 %1) nounwind { +; CHECK-LABEL: srai_srli_slli: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srai a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 6 +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ret +entry: + %2 = ashr i64 %1, 32 + %3 = lshr i64 %2, 6 + %4 = getelementptr i128, ptr %0, i64 %3 + ret ptr %4 +} From 3962d509066e0f45d8d81a4eeae9c100cfa89425 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 16 Aug 2024 10:42:40 -0700 Subject: [PATCH 4/8] fixup! address review comment --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 1ee095fbf0e45..d91ae42d9439e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1483,7 +1483,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can // use (slli (srli (srai y, c2 - c3), c3 + c4), c4). - if (isShiftedMask_64(C1)) { + if (isShiftedMask_64(C1) && !Skip) { unsigned Leading = XLen - llvm::bit_width(C1); unsigned Trailing = llvm::countr_zero(C1); if (C2 > Leading && Trailing > 0) { From b52288596620dcc6a338118c2ab148b3990c83c7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Aug 2024 10:07:48 -0700 Subject: [PATCH 5/8] fixup! Ensure there are leading zeros. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index d91ae42d9439e..7fcbfa377be80 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1486,7 +1486,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (isShiftedMask_64(C1) && !Skip) { unsigned Leading = XLen - llvm::bit_width(C1); unsigned Trailing = llvm::countr_zero(C1); - if (C2 > Leading && Trailing > 0) { + if (C2 > Leading && Leading > 0 && Trailing > 0) { SDNode *SRAI = CurDAG->getMachineNode( RISCV::SRAI, DL, VT, N0.getOperand(0), CurDAG->getTargetConstant(C2 - Leading, DL, VT)); From abe4728ba43a5c9ebfda69a596dee485d1c5983a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Aug 2024 10:15:45 -0700 Subject: [PATCH 6/8] fixup! add tests --- llvm/test/CodeGen/RISCV/rv64zba.ll | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index c589b5ccb53d6..065192c6ea6c1 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -3025,3 +3025,32 @@ entry: %4 = getelementptr i128, ptr %0, i64 %3 ret ptr %4 } + +; Negative to make sure the peephole added for srai_srli_slli and +; srai_srli_sh3add doesn't break this. +define i64 @srai_andi(i64 %x) nounwind { +; CHECK-LABEL: srai_andi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srai a0, a0, 8 +; CHECK-NEXT: andi a0, a0, -8 +; CHECK-NEXT: ret +entry: + %y = ashr i64 %x, 8 + %z = and i64 %y, -8 + ret i64 %z +} + +; Negative to make sure the peephole added for srai_srli_slli and +; srai_srli_sh3add doesn't break this. +define i64 @srai_lui_and(i64 %x) nounwind { +; CHECK-LABEL: srai_and: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srai a0, a0, 8 +; CHECK-NEXT: lui a1, 1048574 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret +entry: + %y = ashr i64 %x, 8 + %z = and i64 %y, -8192 + ret i64 %z +} From 56ab9cdace1cc79ecf845d4251ba9a38126ac64f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Aug 2024 16:16:16 -0700 Subject: [PATCH 7/8] fixup! fix CHECK line. --- llvm/test/CodeGen/RISCV/rv64zba.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 065192c6ea6c1..62595fd4a7ad6 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -3043,7 +3043,7 @@ entry: ; Negative to make sure the peephole added for srai_srli_slli and ; srai_srli_sh3add doesn't break this. define i64 @srai_lui_and(i64 %x) nounwind { -; CHECK-LABEL: srai_and: +; CHECK-LABEL: srai_lui_and: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: srai a0, a0, 8 ; CHECK-NEXT: lui a1, 1048574 From e96b8f326295cbec17bd493b7a98c3c056cce60f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Aug 2024 22:31:30 -0700 Subject: [PATCH 8/8] fixup! Add Leading > 0 check to selectSHXADDOp too. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 7fcbfa377be80..e5076e1275187 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3053,7 +3053,7 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, unsigned XLen = Subtarget->getXLen(); unsigned Leading = XLen - llvm::bit_width(Mask); unsigned Trailing = llvm::countr_zero(Mask); - if (C2 > Leading && Trailing == ShAmt) { + if (C2 > Leading && Leading > 0 && Trailing == ShAmt) { SDLoc DL(N); EVT VT = N.getValueType(); Val = SDValue(CurDAG->getMachineNode(