Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64] Improve the codegen for sdiv 2 #98324

Merged
merged 1 commit into from
Jul 12, 2024
Merged

[AArch64] Improve the codegen for sdiv 2 #98324

merged 1 commit into from
Jul 12, 2024

Conversation

vfdff
Copy link
Contributor

@vfdff vfdff commented Jul 10, 2024

Follow PR97879, if X's size is BitWidth, then X sdiv 2 can be expressived as

  X += X >> (BitWidth - 1)
  X = X >> 1

Fix #97884

@llvmbot
Copy link
Member

llvmbot commented Jul 10, 2024

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Author: Allen (vfdff)

Changes

Follow PR97879, if X's size is BitWidth, then X sdiv 2 can be expressived as

  X += X >> (BitWidth - 1)
  X = X >> 1

Fix #97884


Full diff: https://github.com/llvm/llvm-project/pull/98324.diff

4 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+23-10)
  • (modified) llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll (+1-2)
  • (modified) llvm/test/CodeGen/AArch64/sdivpow2.ll (+3-6)
  • (modified) llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll (+54-136)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 458f962802b4c..2b2d70976eb02 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6182,18 +6182,31 @@ SDValue TargetLowering::buildSDIVPow2WithCMov(
   SDLoc DL(N);
   SDValue N0 = N->getOperand(0);
   SDValue Zero = DAG.getConstant(0, DL, VT);
-  APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
-  SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
+  SDValue CMov;
 
-  // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
-  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
-  SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
-  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
-  SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+  if (Lg2 == 1) {
+    // If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right.
+    unsigned BitWidth = VT.getSizeInBits();
+    SDValue SignVal = DAG.getNode(ISD::SRL, DL, VT, N0,
+                                  DAG.getConstant(BitWidth - 1, DL, VT));
+    CMov = DAG.getNode(ISD::ADD, DL, VT, N0, SignVal);
+
+    Created.push_back(SignVal.getNode());
+    Created.push_back(CMov.getNode());
+  } else {
+    APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
+    SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
 
-  Created.push_back(Cmp.getNode());
-  Created.push_back(Add.getNode());
-  Created.push_back(CMov.getNode());
+    // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
+    EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+    SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
+    CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
+
+    Created.push_back(Cmp.getNode());
+    Created.push_back(Add.getNode());
+    Created.push_back(CMov.getNode());
+  }
 
   // Divide by pow2.
   SDValue SRA =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 3a17a95ed71da..6431cfc58a54d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -202,9 +202,8 @@ define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    sub sp, sp, #32
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SD-NEXT:    cmp w0, #0
+; CHECK-SD-NEXT:    add w8, w0, w0, lsr #31
 ; CHECK-SD-NEXT:    mov w9, wzr
-; CHECK-SD-NEXT:    cinc w8, w0, lt
 ; CHECK-SD-NEXT:    asr w8, w8, #1
 ; CHECK-SD-NEXT:  .LBB11_1: // %do.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AArch64/sdivpow2.ll b/llvm/test/CodeGen/AArch64/sdivpow2.ll
index 4619534151814..2551be8555ce6 100644
--- a/llvm/test/CodeGen/AArch64/sdivpow2.ll
+++ b/llvm/test/CodeGen/AArch64/sdivpow2.ll
@@ -90,8 +90,7 @@ define i64 @test7(i64 %x) {
 define i64 @test8(i64 %x) {
 ; ISEL-LABEL: test8:
 ; ISEL:       // %bb.0:
-; ISEL-NEXT:    cmp x0, #0
-; ISEL-NEXT:    cinc x8, x0, lt
+; ISEL-NEXT:    add x8, x0, x0, lsr #63
 ; ISEL-NEXT:    asr x0, x8, #1
 ; ISEL-NEXT:    ret
 ;
@@ -110,10 +109,8 @@ define i32 @sdiv_int(i32 %begin, i32 %first) #0 {
 ; ISEL-LABEL: sdiv_int:
 ; ISEL:       // %bb.0:
 ; ISEL-NEXT:    sub w8, w0, w1
-; ISEL-NEXT:    add w9, w8, #1
-; ISEL-NEXT:    add w10, w8, #2
-; ISEL-NEXT:    cmp w9, #0
-; ISEL-NEXT:    csinc w8, w10, w8, lt
+; ISEL-NEXT:    add w8, w8, #1
+; ISEL-NEXT:    add w8, w8, w8, lsr #31
 ; ISEL-NEXT:    sub w0, w0, w8, asr #1
 ; ISEL-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
index f7dda82885678..55742e12db6c0 100644
--- a/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
+++ b/llvm/test/CodeGen/RISCV/sdiv-pow2-cmov.ll
@@ -5,177 +5,95 @@
 ; RUN:   | FileCheck -check-prefixes=CHECK,SFB %s
 
 define signext i32 @sdiv2_32(i32 signext %0) {
-; NOSFB-LABEL: sdiv2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    sraiw a0, a0, 1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdiv2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB0_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB0_2:
-; SFB-NEXT:    sraiw a0, a0, 1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdiv2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    sraiw a0, a0, 1
+; CHECK-NEXT:    ret
   %res = sdiv i32 %0, 2
   ret i32 %res
 }
 
 define signext i32 @sdivneg2_32(i32 signext %0) {
-; NOSFB-LABEL: sdivneg2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    sraiw a0, a0, 1
-; NOSFB-NEXT:    neg a0, a0
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdivneg2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB1_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB1_2:
-; SFB-NEXT:    sraiw a0, a0, 1
-; SFB-NEXT:    neg a0, a0
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdivneg2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    sraiw a0, a0, 1
+; CHECK-NEXT:    neg a0, a0
+; CHECK-NEXT:    ret
   %res = sdiv i32 %0, -2
   ret i32 %res
 }
 
 define i64 @sdiv2_64(i64 %0) {
-; NOSFB-LABEL: sdiv2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    srai a0, a0, 1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdiv2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB2_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB2_2:
-; SFB-NEXT:    srai a0, a0, 1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdiv2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    srai a0, a0, 1
+; CHECK-NEXT:    ret
   %res = sdiv i64 %0, 2
   ret i64 %res
 }
 
 define i64 @sdivneg2_64(i64 %0) {
-; NOSFB-LABEL: sdivneg2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a0, a0, a1
-; NOSFB-NEXT:    srai a0, a0, 1
-; NOSFB-NEXT:    neg a0, a0
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sdivneg2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    bgez a0, .LBB3_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a0, a0, 1
-; SFB-NEXT:  .LBB3_2:
-; SFB-NEXT:    srai a0, a0, 1
-; SFB-NEXT:    neg a0, a0
-; SFB-NEXT:    ret
+; CHECK-LABEL: sdivneg2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    srai a0, a0, 1
+; CHECK-NEXT:    neg a0, a0
+; CHECK-NEXT:    ret
   %res = sdiv i64 %0, -2
   ret i64 %res
 }
 
 define signext i32 @srem2_32(i32 signext %0) {
-; NOSFB-LABEL: srem2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    subw a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: srem2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB4_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB4_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    subw a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: srem2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    subw a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i32 %0, 2
   ret i32 %res
 }
 
 define signext i32 @sremneg2_32(i32 signext %0) {
-; NOSFB-LABEL: sremneg2_32:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srliw a1, a0, 31
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    subw a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sremneg2_32:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB5_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB5_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    subw a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sremneg2_32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srliw a1, a0, 31
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    subw a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i32 %0, -2
   ret i32 %res
 }
 
 define i64 @srem2_64(i64 %0) {
-; NOSFB-LABEL: srem2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    sub a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: srem2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB6_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB6_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    sub a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: srem2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    sub a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i64 %0, 2
   ret i64 %res
 }
 
 define i64 @sremneg2_64(i64 %0) {
-; NOSFB-LABEL: sremneg2_64:
-; NOSFB:       # %bb.0:
-; NOSFB-NEXT:    srli a1, a0, 63
-; NOSFB-NEXT:    add a1, a1, a0
-; NOSFB-NEXT:    andi a1, a1, -2
-; NOSFB-NEXT:    sub a0, a0, a1
-; NOSFB-NEXT:    ret
-;
-; SFB-LABEL: sremneg2_64:
-; SFB:       # %bb.0:
-; SFB-NEXT:    mv a1, a0
-; SFB-NEXT:    bgez a0, .LBB7_2
-; SFB-NEXT:  # %bb.1:
-; SFB-NEXT:    addi a1, a0, 1
-; SFB-NEXT:  .LBB7_2:
-; SFB-NEXT:    andi a1, a1, -2
-; SFB-NEXT:    sub a0, a0, a1
-; SFB-NEXT:    ret
+; CHECK-LABEL: sremneg2_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srli a1, a0, 63
+; CHECK-NEXT:    add a1, a1, a0
+; CHECK-NEXT:    andi a1, a1, -2
+; CHECK-NEXT:    sub a0, a0, a1
+; CHECK-NEXT:    ret
   %res = srem i64 %0, -2
   ret i64 %res
 }

; NOSFB-NEXT: sraiw a0, a0, 1
; NOSFB-NEXT: ret
;
; SFB-LABEL: sdiv2_32:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On SiFive's cores with SFB (Short forward branch), bgez + addi only takes one cycle.

SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
if (Lg2 == 1) {
// If Divisor is 2, add 1 << (BitWidth -1) to it before shifting right.
Copy link
Collaborator

@topperc topperc Jul 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment isn't accurate. It's adding (N0 >> (BitWidth - 1)).

Similar to X86, if X's size is BitWidth, then X sdiv 2 can be expressived as
```
 X += X >> (BitWidth - 1)
 X = X >> 1
```
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@vfdff vfdff merged commit 1bafe77 into llvm:main Jul 12, 2024
7 checks passed
aaryanshukla pushed a commit to aaryanshukla/llvm-project that referenced this pull request Jul 14, 2024
Same as X86, , if X's size is BitWidth, then X sdiv 2 can be
expressived as
```
  X += X >> (BitWidth - 1)
  X = X >> 1
```

Fix llvm#97884
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[AArch64] Special optimazation for sdiv 2
4 participants