-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Improve the codegen for sdiv 2 #98324
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag Author: Allen (vfdff) ChangesFollow PR97879, if X's size is BitWidth, then X sdiv 2 can be expressived as
Fix #97884 Full diff: https://github.com/llvm/llvm-project/pull/98324.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 458f962802b4c..2b2d70976eb02 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6182,18 +6182,31 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue Zero = DAG.getConstant(0, DL, VT);
- APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
- SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+ SDValue CMov;
- // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
- EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
- SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+ if (Lg2 == 1) {
+ // If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right.
+ unsigned BitWidth = VT.getSizeInBits();
+ SDValue SignVal = DAG.getNode(ISD::SRL, DL, VT, N0,
+ DAG.getConstant(BitWidth - 1, DL, VT));
+ CMov = DAG.getNode(ISD::ADD, DL, VT, N0, SignVal);
+
+ Created.push_back(SignVal.getNode());
+ Created.push_back(CMov.getNode());
+ } else {
+ APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+ SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
- Created.push_back(Cmp.getNode());
- Created.push_back(Add.getNode());
- Created.push_back(CMov.getNode());
+ // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+ CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+ Created.push_back(Cmp.getNode());
+ Created.push_back(Add.getNode());
+ Created.push_back(CMov.getNode());
+ }
// Divide by pow2.
SDValue SRA =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 3a17a95ed71da..6431cfc58a54d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -202,9 +202,8 @@ define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #32
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: add w8, w0, w0, lsr #31
; CHECK-SD-NEXT: mov w9, wzr
-; CHECK-SD-NEXT: cinc w8, w0, lt
; CHECK-SD-NEXT: asr w8, w8, #1
; CHECK-SD-NEXT: .LBB11_1: // %do.body
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AArch64/sdivpow2.ll b/llvm/test/CodeGen/AArch64/sdivpow2.ll
index 4619534151814..2551be8555ce6 100644
--- a/llvm/test/CodeGen/AArch64/sdivpow2.ll
+++ b/llvm/test/CodeGen/AArch64/sdivpow2.ll
@@ -90,8 +90,7 @@ define i64 @test7(i64 %x) {
define i64 @test8(i64 %x) {
; ISEL-LABEL: test8:
; ISEL: // %bb.0:
-; ISEL-NEXT: cmp x0, #0
-; ISEL-NEXT: cinc x8, x0, lt
+; ISEL-NEXT: add x8, x0, x0, lsr #63
; ISEL-NEXT: asr x0, x8, #1
; ISEL-NEXT: ret
;
@@ -110,10 +109,8 @@ define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
; ISEL-LABEL: sdiv_int:
; ISEL: // %bb.0:
; ISEL-NEXT: sub w8, w0, w1
-; ISEL-NEXT: add w9, w8, #1
-; ISEL-NEXT: add w10, w8, #2
-; ISEL-NEXT: cmp w9, #0
-; ISEL-NEXT: csinc w8, w10, w8, lt
+; ISEL-NEXT: add w8, w8, #1
+; ISEL-NEXT: add w8, w8, w8, lsr #31
; ISEL-NEXT: sub w0, w0, w8, asr #1
; ISEL-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
index f7dda82885678..55742e12db6c0 100644
--- a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
+++ b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
@@ -5,177 +5,95 @@
; RUN: | FileCheck -check-prefixes=CHECK,SFB %s
define signext i32 @sdiv2_32(i32 signext %0) {
-; NOSFB-LABEL: sdiv2_32:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srliw a1, a0, 31
-; NOSFB-NEXT: add a0, a0, a1
-; NOSFB-NEXT: sraiw a0, a0, 1
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: sdiv2_32:
-; SFB: # %bb.0:
-; SFB-NEXT: bgez a0, .LBB0_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a0, a0, 1
-; SFB-NEXT: .LBB0_2:
-; SFB-NEXT: sraiw a0, a0, 1
-; SFB-NEXT: ret
+; CHECK-LABEL: sdiv2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: sraiw a0, a0, 1
+; CHECK-NEXT: ret
%res = sdiv i32 %0, 2
ret i32 %res
}
define signext i32 @sdivneg2_32(i32 signext %0) {
-; NOSFB-LABEL: sdivneg2_32:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srliw a1, a0, 31
-; NOSFB-NEXT: add a0, a0, a1
-; NOSFB-NEXT: sraiw a0, a0, 1
-; NOSFB-NEXT: neg a0, a0
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: sdivneg2_32:
-; SFB: # %bb.0:
-; SFB-NEXT: bgez a0, .LBB1_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a0, a0, 1
-; SFB-NEXT: .LBB1_2:
-; SFB-NEXT: sraiw a0, a0, 1
-; SFB-NEXT: neg a0, a0
-; SFB-NEXT: ret
+; CHECK-LABEL: sdivneg2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: sraiw a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
%res = sdiv i32 %0, -2
ret i32 %res
}
define i64 @sdiv2_64(i64 %0) {
-; NOSFB-LABEL: sdiv2_64:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srli a1, a0, 63
-; NOSFB-NEXT: add a0, a0, a1
-; NOSFB-NEXT: srai a0, a0, 1
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: sdiv2_64:
-; SFB: # %bb.0:
-; SFB-NEXT: bgez a0, .LBB2_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a0, a0, 1
-; SFB-NEXT: .LBB2_2:
-; SFB-NEXT: srai a0, a0, 1
-; SFB-NEXT: ret
+; CHECK-LABEL: sdiv2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: srai a0, a0, 1
+; CHECK-NEXT: ret
%res = sdiv i64 %0, 2
ret i64 %res
}
define i64 @sdivneg2_64(i64 %0) {
-; NOSFB-LABEL: sdivneg2_64:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srli a1, a0, 63
-; NOSFB-NEXT: add a0, a0, a1
-; NOSFB-NEXT: srai a0, a0, 1
-; NOSFB-NEXT: neg a0, a0
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: sdivneg2_64:
-; SFB: # %bb.0:
-; SFB-NEXT: bgez a0, .LBB3_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a0, a0, 1
-; SFB-NEXT: .LBB3_2:
-; SFB-NEXT: srai a0, a0, 1
-; SFB-NEXT: neg a0, a0
-; SFB-NEXT: ret
+; CHECK-LABEL: sdivneg2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: srai a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
%res = sdiv i64 %0, -2
ret i64 %res
}
define signext i32 @srem2_32(i32 signext %0) {
-; NOSFB-LABEL: srem2_32:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srliw a1, a0, 31
-; NOSFB-NEXT: add a1, a1, a0
-; NOSFB-NEXT: andi a1, a1, -2
-; NOSFB-NEXT: subw a0, a0, a1
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: srem2_32:
-; SFB: # %bb.0:
-; SFB-NEXT: mv a1, a0
-; SFB-NEXT: bgez a0, .LBB4_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a1, a0, 1
-; SFB-NEXT: .LBB4_2:
-; SFB-NEXT: andi a1, a1, -2
-; SFB-NEXT: subw a0, a0, a1
-; SFB-NEXT: ret
+; CHECK-LABEL: srem2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
%res = srem i32 %0, 2
ret i32 %res
}
define signext i32 @sremneg2_32(i32 signext %0) {
-; NOSFB-LABEL: sremneg2_32:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srliw a1, a0, 31
-; NOSFB-NEXT: add a1, a1, a0
-; NOSFB-NEXT: andi a1, a1, -2
-; NOSFB-NEXT: subw a0, a0, a1
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: sremneg2_32:
-; SFB: # %bb.0:
-; SFB-NEXT: mv a1, a0
-; SFB-NEXT: bgez a0, .LBB5_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a1, a0, 1
-; SFB-NEXT: .LBB5_2:
-; SFB-NEXT: andi a1, a1, -2
-; SFB-NEXT: subw a0, a0, a1
-; SFB-NEXT: ret
+; CHECK-LABEL: sremneg2_32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srliw a1, a0, 31
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
%res = srem i32 %0, -2
ret i32 %res
}
define i64 @srem2_64(i64 %0) {
-; NOSFB-LABEL: srem2_64:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srli a1, a0, 63
-; NOSFB-NEXT: add a1, a1, a0
-; NOSFB-NEXT: andi a1, a1, -2
-; NOSFB-NEXT: sub a0, a0, a1
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: srem2_64:
-; SFB: # %bb.0:
-; SFB-NEXT: mv a1, a0
-; SFB-NEXT: bgez a0, .LBB6_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a1, a0, 1
-; SFB-NEXT: .LBB6_2:
-; SFB-NEXT: andi a1, a1, -2
-; SFB-NEXT: sub a0, a0, a1
-; SFB-NEXT: ret
+; CHECK-LABEL: srem2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: ret
%res = srem i64 %0, 2
ret i64 %res
}
define i64 @sremneg2_64(i64 %0) {
-; NOSFB-LABEL: sremneg2_64:
-; NOSFB: # %bb.0:
-; NOSFB-NEXT: srli a1, a0, 63
-; NOSFB-NEXT: add a1, a1, a0
-; NOSFB-NEXT: andi a1, a1, -2
-; NOSFB-NEXT: sub a0, a0, a1
-; NOSFB-NEXT: ret
-;
-; SFB-LABEL: sremneg2_64:
-; SFB: # %bb.0:
-; SFB-NEXT: mv a1, a0
-; SFB-NEXT: bgez a0, .LBB7_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: addi a1, a0, 1
-; SFB-NEXT: .LBB7_2:
-; SFB-NEXT: andi a1, a1, -2
-; SFB-NEXT: sub a0, a0, a1
-; SFB-NEXT: ret
+; CHECK-LABEL: sremneg2_64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srli a1, a0, 63
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: andi a1, a1, -2
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: ret
%res = srem i64 %0, -2
ret i64 %res
}
|
; NOSFB-NEXT: sraiw a0, a0, 1 | ||
; NOSFB-NEXT: ret | ||
; | ||
; SFB-LABEL: sdiv2_32: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On SiFive's cores with SFB (Short forward branch), bgez + addi
only takes one cycle.
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); | ||
SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); | ||
if (Lg2 == 1) { | ||
// If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment isn't accurate. It's adding (N0 >> (BitWidth - 1))
.
Similar to X86, if X's size is BitWidth, then X sdiv 2 can be expressived as ``` X += X >> (BitWidth - 1) X = X >> 1 ```
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Same as X86, , if X's size is BitWidth, then X sdiv 2 can be expressived as ``` X += X >> (BitWidth - 1) X = X >> 1 ``` Fix llvm#97884
Follow PR97879, if X's size is BitWidth, then X sdiv 2 can be expressived as
Fix #97884