-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify (a % b) lt/ge (b-1)
into (a % b) eq/ne (b-1)
#72504
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (elhewaty) Changes
Full diff: https://github.com/llvm/llvm-project/pull/72504.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9bc84c7dd6e1539..96ca914388a9df3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6837,6 +6837,35 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
Changed = true;
}
+ {
+ Value *X;
+ const APInt *C, *CC;
+ ICmpInst::Predicate Pred = I.getPredicate();
+ if (match(Op1, m_SRem(m_Value(X), m_Power2(C))) &&
+ match(Op0, m_APInt(CC))) {
+ std::swap(Op0, Op1);
+ Pred = I.getSwappedPredicate();
+ }
+
+ if (match(Op0, m_SRem(m_Value(X), m_Power2(C))) &&
+ match(Op1, m_APInt(CC)) && *CC == *C - 1) {
+ int BW = C->getBitWidth();
+ int Log2 = C->exactLogBase2();
+ long AndWith = -(1ll << (BW - 1)) + (1ll << Log2) - 1;
+ auto *And = Builder.CreateAnd(X, AndWith);
+ // icmp sge (X % C), (C - 1)
+ // --> icmp eq (X & -(pow(2, BW - 1) - pow(2, log(C)) + 1)), (C - 1)
+ if (Pred == ICmpInst::ICMP_SLT)
+ return new ICmpInst(ICmpInst::ICMP_NE, And,
+ ConstantInt::get(And->getType(), *CC));
+ // icmp sge (X % C), (C - 1)
+ // --> icmp eq (X & -(pow(2, BW - 1) - pow(2, log(C)) + 1)), (C - 1)
+ if (Pred == ICmpInst::ICMP_SGE)
+ return new ICmpInst(ICmpInst::ICMP_EQ, And,
+ ConstantInt::get(And->getType(), *CC));
+ }
+ }
+
if (Value *V = simplifyICmpInst(I.getPredicate(), Op0, Op1, Q))
return replaceInstUsesWith(I, V);
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 78ac730cf026ed9..d7ef8e56f11f858 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -10,6 +10,139 @@ declare void @use_i8(i8)
declare void @use_i32(i32)
declare void @use_i64(i64)
+; tests for (x % y) >=/ < (y - 1)
+define i1 @srem_sge_test1(i64 %x) {
+; CHECK-LABEL: @srem_sge_test1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], -9223372028264841217
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], 8589934591
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i64 %x, 8589934592
+ %cmp = icmp sge i64 %rem, 8589934591
+ ret i1 %cmp
+}
+
+define i1 @srem_slt_test1(i64 %x) {
+; CHECK-LABEL: @srem_slt_test1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], -9223372028264841217
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TMP1]], 8589934591
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i64 %x, 8589934592
+ %cmp = icmp slt i64 %rem, 8589934591
+ ret i1 %cmp
+}
+
+define i1 @srem_sge_test2(i32 %x) {
+; CHECK-LABEL: @srem_sge_test2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2147482625
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1023
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i32 %x, 1024
+ %cmp = icmp sge i32 %rem, 1023
+ ret i1 %cmp
+}
+
+define i1 @srem_slt_test2(i32 %x) {
+; CHECK-LABEL: @srem_slt_test2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2147483393
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 255
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i32 %x, 256
+ %cmp = icmp slt i32 %rem, 255
+ ret i1 %cmp
+}
+
+define i1 @srem_sge_test3(i16 %x) {
+; CHECK-LABEL: @srem_sge_test3(
+; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], -24577
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[TMP1]], 8191
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i16 %x, 8192
+ %cmp = icmp sge i16 %rem, 8191
+ ret i1 %cmp
+}
+
+define i1 @srem_slt_test3(i16 %x) {
+; CHECK-LABEL: @srem_slt_test3(
+; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], -24577
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i16 [[TMP1]], 8191
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i16 %x, 8192
+ %cmp = icmp slt i16 %rem, 8191
+ ret i1 %cmp
+}
+
+define i1 @srem_sge_test4(i8 %x) {
+; CHECK-LABEL: @srem_sge_test4(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -65
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP1]], 63
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i8 %x, 64
+ %cmp = icmp sge i8 %rem, 63
+ ret i1 %cmp
+}
+
+define i1 @srem_slt_test4(i8 %x) {
+; CHECK-LABEL: @srem_slt_test4(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -65
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[TMP1]], 63
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i8 %x, 64
+ %cmp = icmp slt i8 %rem, 63
+ ret i1 %cmp
+}
+
+; tests for (y - 1) >/<= (x % y)
+define i1 @srem_sgt_test1(i32 %x) {
+; CHECK-LABEL: @srem_sgt_test1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2146435073
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 1048575
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i32 %x, 1048576
+ %cmp = icmp sgt i32 1048575, %rem
+ ret i1 %cmp
+}
+
+define i1 @srem_sle_test1(i16 %x) {
+; CHECK-LABEL: @srem_sle_test1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], -24577
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[TMP1]], 8191
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i16 %x, 8192
+ %cmp = icmp sle i16 8191, %rem
+ ret i1 %cmp
+}
+
+; negative tests
+define i1 @srem_sgt_test(i32 %x) {
+; CHECK-LABEL: @srem_sgt_test(
+; CHECK-NEXT: ret i1 false
+;
+ %rem = srem i32 %x, 32
+ %cmp = icmp sgt i32 %rem, 31
+ ret i1 %cmp
+}
+
+define i1 @srem_another_negative_test(i32 %x) {
+; CHECK-LABEL: @srem_another_negative_test(
+; CHECK-NEXT: [[REM:%.*]] = srem i32 [[X:%.*]], 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[REM]], 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %rem = srem i32 %x, 8
+ %cmp = icmp sge i32 %rem, 6
+ ret i1 %cmp
+}
+
define i32 @test1(i32 %X) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 [[X:%.*]], 31
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please provide alive2 links with symbolic constants, not specific ones.
As far as I can tell, this transform doesn't actually require a power of two? We only need to be careful about the sign. https://alive2.llvm.org/ce/z/AJq3Ch
Just to be clear, I'm viewing this in terms of folding an inequality predicate into an equality predicate. Once we have an equality comparison, it will be converted into a mask check for powers of two by an existing transform. |
Can you explain further please, I think I don't understand the code very well. |
The srem result of |
@dtcxzyw Does llvm.assume require special handling?
This code doesn't change the tests. |
You need to check the sign of
nit: Don't create instructions that may be unused.
|
I check by |
@dtcxzyw why doesn't the code optimize the following test?
|
For example:
But I think handling constants is enough:
|
fb9a3d6
to
92f1a7c
Compare
92f1a7c
to
a934703
Compare
The first version of the pull handled this: https://godbolt.org/z/sTrsj1aW4 |
Sure! |
(a % b) lt/ge (b-1)
into (a % b) eq/ne (b-1)
Please also update the PR description and the alive2 link. |
I think the most principled way to handle this would be to generalize the existing handling for converting inequality to equality comparisons which currently only uses dominating conditions to also use computeConstantRange(). I've sketched this out here: 541ec50 This does cause quite a few regressions that would have to be fixed first. It also regresses compile-time: http://llvm-compile-time-tracker.com/compare.php?from=9ca9c2cf7e05a0fe44a8a688d0c322d5229511d9&to=541ec50dd7907e0945f30fd3778d599425a4e665&stat=instructions%3Au Possibly the compile-time regression can be avoided if we support ConstantRange in WithCache, because we already perform this computeConstantRange() call in InstSimplify. In the meantime adding srem specific handling sounds okay, but I'm not willing to accept support for non-constant operands. |
@dtcxzyw @nikic, sorry for my late reply. from my understanding, we should add this optimization, but handle only constants integer scalars, I have a question here how can we make sure that the tests work fine as
|
Yeah, you should use |
b3550bf
to
7cf0d5c
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
12a27f7
to
03f785d
Compare
(Pred == ICmpInst::ICMP_SGT && C == *C1 - 2))) || | ||
(match(SRem->getOperand(1), m_Negative(C1)) && | ||
((Pred == ICmpInst::ICMP_SGT && C == *C1 + 1) || | ||
(Pred == ICmpInst::ICMP_SLT && C == *C1 + 2)))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry to bring this up only now, but playing around with this a bit, I don't think the way negative numbers are handled here makes sense. The thing is that srem %x, -C
will be canonicalized to srem %x, C
(the sign of the srem result is determined by the first operand, not the second one!) so we will never actually hit the m_Negative()
branch.
We should still keep the m_NonNegative(C1)
due to worklist order considerations, but the m_Negative(C1)
branch can be dropped.
Instead, we'd want to add more cases to the positive case, such as (srem %x, C) sgt (-C + 1)
. This would be one of the new cases: https://alive2.llvm.org/ce/z/MeiohD Same for slt and +2.
define i1 @srem_sgt_test(i16 %x) { | ||
; CHECK-LABEL: @srem_sgt_test( | ||
; CHECK-NEXT: [[Y:%.*]] = srem i16 [[X:%.*]], 2259 | ||
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[Y]], -2258 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can see it in this test. The sign of the RHS of the srem has been flipped, and the fold does not apply, even though it was supposed to.
03f785d
to
fd999f5
Compare
@nikic ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implementation looks good, just a note on the tests.
%y = urem i32 %x, 15344 | ||
%cmp = icmp ult i32 %y, 15343 | ||
ret i1 %cmp | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd like to see some negative tests with off by one constants. For example here ult 15342
should not transform.
fd999f5
to
2820199
Compare
2820199
to
ab83673
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@elhewaty your change seems to be causing some test failures, can you take a look? |
Reverted in de8f782. It looks like not all tests were updated. (I did check the pre-commit results, but they failed due to an unrelated Flang issue and for some reason we do not run LLVM tests if Flang tests fail -- WTF?) |
@nikic what should I do then? |
You should fix it, and open another PR to reland this patch. |
@dtcxzyw should I open another PR with only the tests updated or with the whole patch? |
and for the following change
the old optimization is one instruction |
@elhewaty It looks like we have to implement this fold first: https://alive2.llvm.org/ce/z/pqfbT7 It looks like this already happens when the comparison is with a positive number, so wherever that happens needs support for negative numbers as well. |
@nikic, we need to find a pattern to handle negative numbers first, right? |
Alive2: https://alive2.llvm.org/ce/z/i7zYtE
Fixes: clang is suboptimal for
(a % b) lt/ge (b-1)
where b is a power of 2 #71280