Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify (a % b) lt/ge (b-1) into (a % b) eq/ne (b-1) #72504

Merged
merged 2 commits into from
Jan 16, 2024

Conversation

elhewaty
Copy link
Member

@elhewaty elhewaty commented Nov 16, 2023

@llvmbot
Copy link
Member

llvmbot commented Nov 16, 2023

@llvm/pr-subscribers-llvm-transforms

Author: None (elhewaty)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/72504.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+29)
  • (modified) llvm/test/Transforms/InstCombine/icmp.ll (+133)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9bc84c7dd6e1539..96ca914388a9df3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6837,6 +6837,35 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
     Changed = true;
   }
 
+  {
+    Value *X;
+    const APInt *C, *CC;
+    ICmpInst::Predicate Pred = I.getPredicate();
+    if (match(Op1, m_SRem(m_Value(X), m_Power2(C))) &&
+        match(Op0, m_APInt(CC))) {
+      std::swap(Op0, Op1);
+      Pred = I.getSwappedPredicate();
+    }
+
+    if (match(Op0, m_SRem(m_Value(X), m_Power2(C))) &&
+        match(Op1, m_APInt(CC)) && *CC == *C - 1) {
+      int BW = C->getBitWidth();
+      int Log2 = C->exactLogBase2();
+      long AndWith = -(1ll << (BW - 1)) + (1ll << Log2) - 1;
+      auto *And = Builder.CreateAnd(X, AndWith);
+      // icmp sge (X % C), (C - 1)
+      //   --> icmp eq (X & -(pow(2, BW - 1) - pow(2, log(C)) + 1)), (C - 1)
+      if (Pred == ICmpInst::ICMP_SLT)
+        return new ICmpInst(ICmpInst::ICMP_NE, And,
+                            ConstantInt::get(And->getType(), *CC));
+      // icmp sge (X % C), (C - 1)
+      //   --> icmp eq (X & -(pow(2, BW - 1) - pow(2, log(C)) + 1)), (C - 1)
+      if (Pred == ICmpInst::ICMP_SGE)
+        return new ICmpInst(ICmpInst::ICMP_EQ, And,
+                            ConstantInt::get(And->getType(), *CC));
+    }
+  }
+
   if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
     return replaceInstUsesWith(I, V);
 
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 78ac730cf026ed9..d7ef8e56f11f858 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -10,6 +10,139 @@ declare void @use_i8(i8)
 declare void @use_i32(i32)
 declare void @use_i64(i64)
 
+; tests for (x % y) >=/ < (y - 1)
+define i1 @srem_sge_test1(i64 %x) {
+; CHECK-LABEL: @srem_sge_test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], -9223372028264841217
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP1]], 8589934591
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i64 %x, 8589934592
+  %cmp = icmp sge i64 %rem, 8589934591
+  ret i1 %cmp
+}
+
+define i1 @srem_slt_test1(i64 %x) {
+; CHECK-LABEL: @srem_slt_test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], -9223372028264841217
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i64 [[TMP1]], 8589934591
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i64 %x, 8589934592
+  %cmp = icmp slt i64 %rem, 8589934591
+  ret i1 %cmp
+}
+
+define i1 @srem_sge_test2(i32 %x) {
+; CHECK-LABEL: @srem_sge_test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2147482625
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1023
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i32 %x, 1024
+  %cmp = icmp sge i32 %rem, 1023
+  ret i1 %cmp
+}
+
+define i1 @srem_slt_test2(i32 %x) {
+; CHECK-LABEL: @srem_slt_test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2147483393
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], 255
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i32 %x, 256
+  %cmp = icmp slt i32 %rem, 255
+  ret i1 %cmp
+}
+
+define i1 @srem_sge_test3(i16 %x) {
+; CHECK-LABEL: @srem_sge_test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[X:%.*]], -24577
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[TMP1]], 8191
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i16 %x, 8192
+  %cmp = icmp sge i16 %rem, 8191
+  ret i1 %cmp
+}
+
+define i1 @srem_slt_test3(i16 %x) {
+; CHECK-LABEL: @srem_slt_test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[X:%.*]], -24577
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[TMP1]], 8191
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i16 %x, 8192
+  %cmp = icmp slt i16 %rem, 8191
+  ret i1 %cmp
+}
+
+define i1 @srem_sge_test4(i8 %x) {
+; CHECK-LABEL: @srem_sge_test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -65
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP1]], 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i8 %x, 64
+  %cmp = icmp sge i8 %rem, 63
+  ret i1 %cmp
+}
+
+define i1 @srem_slt_test4(i8 %x) {
+; CHECK-LABEL: @srem_slt_test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -65
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP1]], 63
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i8 %x, 64
+  %cmp = icmp slt i8 %rem, 63
+  ret i1 %cmp
+}
+
+; tests for (y - 1) >/<= (x % y)
+define i1 @srem_sgt_test1(i32 %x) {
+; CHECK-LABEL: @srem_sgt_test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -2146435073
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP1]], 1048575
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i32 %x, 1048576
+  %cmp = icmp sgt i32 1048575, %rem
+  ret i1 %cmp
+}
+
+define i1 @srem_sle_test1(i16 %x) {
+; CHECK-LABEL: @srem_sle_test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[X:%.*]], -24577
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[TMP1]], 8191
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i16 %x, 8192
+  %cmp = icmp sle i16 8191, %rem
+  ret i1 %cmp
+}
+
+; negative tests
+define i1 @srem_sgt_test(i32 %x) {
+; CHECK-LABEL: @srem_sgt_test(
+; CHECK-NEXT:    ret i1 false
+;
+  %rem = srem i32 %x, 32
+  %cmp = icmp sgt i32 %rem, 31
+  ret i1 %cmp
+}
+
+define i1 @srem_another_negative_test(i32 %x) {
+; CHECK-LABEL: @srem_another_negative_test(
+; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[REM]], 5
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %rem = srem i32 %x, 8
+  %cmp = icmp sge i32 %rem, 6
+  ret i1 %cmp
+}
+
 define i32 @test1(i32 %X) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr i32 [[X:%.*]], 31

@elhewaty
Copy link
Member Author

@goldsteinn @arsenm @dtcxzyw

@nikic nikic requested review from dtcxzyw and goldsteinn November 16, 2023 11:58
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please provide alive2 links with symbolic constants, not specific ones.

As far as I can tell, this transform doesn't actually require a power of two? We only need to be careful about the sign. https://alive2.llvm.org/ce/z/AJq3Ch

@nikic
Copy link
Contributor

nikic commented Nov 16, 2023

As far as I can tell, this transform doesn't actually require a power of two? We only need to be careful about the sign. https://alive2.llvm.org/ce/z/AJq3Ch

Just to be clear, I'm viewing this in terms of folding an inequality predicate into an equality predicate. Once we have an equality comparison, it will be converted into a mask check for powers of two by an existing transform.

@elhewaty
Copy link
Member Author

As far as I can tell, this transform doesn't actually require a power of two? We only need to be careful about the sign. https://alive2.llvm.org/ce/z/AJq3Ch

Can you explain further please, I think I don't understand the code very well.

@jcranmer-intel
Copy link
Contributor

jcranmer-intel commented Nov 16, 2023

Can you explain further please, I think I don't understand the code very well.

The srem result of x % n is between -(abs(n) - 1) and (abs(n) - 1), inclusive. This means that (x % n) >= (n - 1) can be converted into (x % n) == (n - 1) and (x % n) < (n - 1) can be converted into (x % n) != (n - 1), respectively. We already have transformations that convert the (x % n) ==/!= (n - 1) to the equivalent and expressions when n is a power of 2.

@elhewaty
Copy link
Member Author

elhewaty commented Nov 17, 2023

@dtcxzyw Does llvm.assume require special handling?

{
    Value *X;
    const APInt *C, *CC;
    ICmpInst::Predicate Pred = I.getPredicate();
    if ((Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_SLT) &&
        match(Op0, m_OneUse(m_SRem(m_Value(X), m_APInt(C)))) &&
        match(Op1, m_OneUse(m_Add(m_SpecificInt(*C), m_APInt(CC)))) &&
        !(C->isNegative()) && *CC == -1) {
      // icmp slt (X % C), (C - 1) --> icmp ne (X % C), (C - 1)
      auto *NewCmp = Builder.CreateICmpNE(Op0, Op1);        
      // icmp sge (X % C), (C - 1) --> icmp eq (X % C), (C - 1)
      if (Pred == ICmpInst::ICMP_SGE)
        NewCmp = Builder.CreateICmpEQ(Op0, Op1);
      return replaceInstUsesWith(I, NewCmp);
    }
  }

This code doesn't change the tests.

@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 17, 2023

@dtcxzyw Does llvm.assume require special handling?

You need to check the sign of C. @llvm.assume is used to add preconditions for alive2.

{
    Value *X;
    const APInt *C;
    ICmpInst::Predicate Pred = I.getPredicate();
    if ((Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_SLT) &&
        match(Op0, m_OneUse(m_SRem(m_Value(X), m_APInt(C)))) &&
        match(Op1, m_OneUse(m_Add(m_SpecificInt(*C), m_AllOnes()))) &&
        !(C->isNegative())) {
      // icmp slt (X % C), (C - 1) --> icmp ne (X % C), (C - 1)
      auto *NewCmp = Builder.CreateICmpNE(Op0, Op1);

nit: Don't create instructions that may be unused.

      // icmp sge (X % C), (C - 1) --> icmp eq (X % C), (C - 1)
      if (Pred == ICmpInst::ICMP_SGE)
        NewCmp = Builder.CreateICmpEQ(Op0, Op1);
      return replaceInstUsesWith(I, NewCmp);
    }
  }

@elhewaty
Copy link
Member Author

I check by !(C->isNegative())

@elhewaty
Copy link
Member Author

@dtcxzyw why doesn't the code optimize the following test?

define i1 @srem_slt_test1(i64 %x, i64 %C) {
; CHECK-LABEL: @srem_slt_test1(
; CHECK-NEXT:    [[PRECOND:%.*]] = icmp sgt i64 [[C:%.*]], -1
; CHECK-NEXT:    call void @llvm.assume(i1 [[PRECOND]])
; CHECK-NEXT:    [[CMINUS1:%.*]] = add nsw i64 [[C]], -1
; CHECK-NEXT:    [[Y:%.*]] = srem i64 [[X:%.*]], [[C]]
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[Y]], [[CMINUS1]]
; CHECK-NEXT:    ret i1 [[CMP]]
;
  %precond = icmp sge i64 %C, 0
  call void @llvm.assume(i1 %precond)
  %Cminus1 = add i64 %C, -1
  %y = srem i64 %x, %C
  %cmp = icmp slt i64 %y, %Cminus1
  ret i1 %cmp
}

@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 18, 2023

@dtcxzyw why doesn't the code optimize the following test?

define i1 @srem_slt_test1(i64 %x, i64 %C) {
; CHECK-LABEL: @srem_slt_test1(
; CHECK-NEXT:    [[PRECOND:%.*]] = icmp sgt i64 [[C:%.*]], -1
; CHECK-NEXT:    call void @llvm.assume(i1 [[PRECOND]])
; CHECK-NEXT:    [[CMINUS1:%.*]] = add nsw i64 [[C]], -1
; CHECK-NEXT:    [[Y:%.*]] = srem i64 [[X:%.*]], [[C]]
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[Y]], [[CMINUS1]]
; CHECK-NEXT:    ret i1 [[CMP]]
;
  %precond = icmp sge i64 %C, 0
  call void @llvm.assume(i1 %precond)
  %Cminus1 = add i64 %C, -1
  %y = srem i64 %x, %C
  %cmp = icmp slt i64 %y, %Cminus1
  ret i1 %cmp
}

m_APInt only matches a constant integer scalar or a constant splat vector. If you want to handle all non-negative variables, please use m_Value and isKnownNonNegative. isKnownNonNegative will use the information provided by @llvm.assume.

For example:

if ((Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_SLT) &&
        match(Op0, m_OneUse(m_SRem(m_Value(X), m_Value(Y)))) &&
        match(Op1, m_OneUse(m_c_Add(m_Deferred(Y), m_AllOnes()))) &&
        isKnownNonNegative(Y, DL, 0, &AC, &I, &DT))

But I think handling constants is enough:

if ((Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_SLT) &&
        match(Op0, m_OneUse(m_SRem(m_Value(X), m_APInt(*C)))) &&
        match(Op1, m_SpecificInt(*C - 1)) &&
        !C->isNegative())

@elhewaty
Copy link
Member Author

The first version of the pull handled this: https://godbolt.org/z/sTrsj1aW4
gcc uses and with -121, can I create another pull, that handles this?

@elhewaty elhewaty requested a review from dtcxzyw November 19, 2023 09:24
@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 19, 2023

The first version of the pull handled this: https://godbolt.org/z/sTrsj1aW4 gcc uses and with -121, can I create another pull, that handles this?

Sure!

@dtcxzyw dtcxzyw changed the title Fold (a % b) lt/ge (b-1) where b is a power of 2 Simplify (a % b) lt/ge (b-1) into (a % b) eq/ne (b-1) Nov 19, 2023
@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 19, 2023

Please also update the PR description and the alive2 link.

@nikic
Copy link
Contributor

nikic commented Nov 20, 2023

I think the most principled way to handle this would be to generalize the existing handling for converting inequality to equality comparisons which currently only uses dominating conditions to also use computeConstantRange(). I've sketched this out here: 541ec50

This does cause quite a few regressions that would have to be fixed first.

It also regresses compile-time: http://llvm-compile-time-tracker.com/compare.php?from=9ca9c2cf7e05a0fe44a8a688d0c322d5229511d9&to=541ec50dd7907e0945f30fd3778d599425a4e665&stat=instructions%3Au

Possibly the compile-time regression can be avoided if we support ConstantRange in WithCache, because we already perform this computeConstantRange() call in InstSimplify.

In the meantime adding srem specific handling sounds okay, but I'm not willing to accept support for non-constant operands.

@elhewaty
Copy link
Member Author

elhewaty commented Dec 4, 2023

@dtcxzyw @nikic, sorry for my late reply. from my understanding, we should add this optimization, but handle only constants integer scalars, I have a question here how can we make sure that the tests work fine as match(Value, m_APInt()) doesn't work with @llvm.assume()? should we add tests with constant scalars only (use integer constants, and splat vectors)?
for example:

%Cminus1 = add i32 1074977277, -1
  %y = srem i32 %x, 1074977277
  %cmp = icmp slt i32 %y, %Cminus1
  ret i1 %cmp

@dtcxzyw
Copy link
Member

dtcxzyw commented Dec 6, 2023

Should we add tests with constant scalars only (use integer constants, and splat vectors)? for example:

Yeah, you should use @llvm.assume in Alive2 proof and use constants in tests.

@elhewaty elhewaty force-pushed the instcombine-cmp branch 2 times, most recently from b3550bf to 7cf0d5c Compare January 13, 2024 01:58
@elhewaty elhewaty requested a review from nikic January 13, 2024 01:59
Copy link

github-actions bot commented Jan 13, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@elhewaty elhewaty force-pushed the instcombine-cmp branch 2 times, most recently from 12a27f7 to 03f785d Compare January 13, 2024 02:19
(Pred == ICmpInst::ICMP_SGT && C == *C1 - 2))) ||
(match(SRem->getOperand(1), m_Negative(C1)) &&
((Pred == ICmpInst::ICMP_SGT && C == *C1 + 1) ||
(Pred == ICmpInst::ICMP_SLT && C == *C1 + 2)))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry to bring this up only now, but playing around with this a bit, I don't think the way negative numbers are handled here makes sense. The thing is that srem %x, -C will be canonicalized to srem %x, C (the sign of the srem result is determined by the first operand, not the second one!) so we will never actually hit the m_Negative() branch.

We should still keep the m_NonNegative(C1) due to worklist order considerations, but the m_Negative(C1) branch can be dropped.

Instead, we'd want to add more cases to the positive case, such as (srem %x, C) sgt (-C + 1). This would be one of the new cases: https://alive2.llvm.org/ce/z/MeiohD Same for slt and +2.

define i1 @srem_sgt_test(i16 %x) {
; CHECK-LABEL: @srem_sgt_test(
; CHECK-NEXT: [[Y:%.*]] = srem i16 [[X:%.*]], 2259
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[Y]], -2258
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can see it in this test. The sign of the RHS of the srem has been flipped, and the fold does not apply, even though it was supposed to.

@elhewaty elhewaty requested a review from nikic January 13, 2024 12:10
@elhewaty
Copy link
Member Author

@nikic ping.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implementation looks good, just a note on the tests.

llvm/test/Transforms/InstCombine/icmp.ll Show resolved Hide resolved
%y = urem i32 %x, 15344
%cmp = icmp ult i32 %y, 15343
ret i1 %cmp
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to see some negative tests with off by one constants. For example here ult 15342 should not transform.

@elhewaty elhewaty requested a review from nikic January 15, 2024 22:42
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@nikic nikic merged commit 01f4d40 into llvm:main Jan 16, 2024
3 of 4 checks passed
@dyung
Copy link
Collaborator

dyung commented Jan 16, 2024

@elhewaty your change seems to be causing some test failures, can you take a look?

https://lab.llvm.org/buildbot/#/builders/139/builds/57175

nikic added a commit that referenced this pull request Jan 16, 2024
@nikic
Copy link
Contributor

nikic commented Jan 16, 2024

Reverted in de8f782. It looks like not all tests were updated.

(I did check the pre-commit results, but they failed due to an unrelated Flang issue and for some reason we do not run LLVM tests if Flang tests fail -- WTF?)

@elhewaty
Copy link
Member Author

@nikic what should I do then?

@dtcxzyw
Copy link
Member

dtcxzyw commented Jan 16, 2024

@nikic what should I do then?

You should fix it, and open another PR to reland this patch.

@elhewaty
Copy link
Member Author

@dtcxzyw should I open another PR with only the tests updated or with the whole patch?

@elhewaty
Copy link
Member Author

and for the following change

diff --git a/llvm/test/Transforms/InstCombine/modulo.ll b/llvm/test/Transforms/InstCombine/modulo.ll
index 2988c524faed..76e16651f4e3 100644
--- a/llvm/test/Transforms/InstCombine/modulo.ll
+++ b/llvm/test/Transforms/InstCombine/modulo.ll
@@ -4,7 +4,10 @@
 ; PR21929
 define i32 @modulo2(i32 %x) {
 ; CHECK-LABEL: @modulo2(
-; CHECK-NEXT:    [[RET_I:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[REM_I:%.*]] = srem i32 [[X:%.*]], 2
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[REM_I]], -1
+; CHECK-NEXT:    [[ADD_I:%.*]] = select i1 [[CMP_I]], i32 2, i32 0
+; CHECK-NEXT:    [[RET_I:%.*]] = add nsw i32 [[ADD_I]], [[REM_I]]
 ; CHECK-NEXT:    ret i32 [[RET_I]]
 ;
   %rem.i = srem i32 %x, 2
@@ -16,7 +19,10 @@ define i32 @modulo2(i32 %x) {
 
 define <2 x i32> @modulo2_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @modulo2_vec(
-; CHECK-NEXT:    [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <2 x i32> [[REM_I]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> <i32 2, i32 2>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RET_I:%.*]] = add nsw <2 x i32> [[ADD_I]], [[REM_I]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET_I]]
 ;
   %rem.i = srem <2 x i32> %x, <i32 2, i32 2>

the old optimization is one instruction and, but the patch replaces this instruction
with 3 instructions.

@nikic
Copy link
Contributor

nikic commented Jan 16, 2024

@elhewaty It looks like we have to implement this fold first: https://alive2.llvm.org/ce/z/pqfbT7 It looks like this already happens when the comparison is with a positive number, so wherever that happens needs support for negative numbers as well.

@elhewaty
Copy link
Member Author

@nikic, we need to find a pattern to handle negative numbers first, right?

justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024
justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

clang is suboptimal for (a % b) lt/ge (b-1) where b is a power of 2
7 participants