-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fold (icmp eq/ne (xor x, y), C1)
even if multiuse
#87275
Conversation
(icmp eq/ne (xor x, y), C1)
even if multiuse
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/87275.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index db302d7e526844..1200f08cef78f1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3461,15 +3461,13 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
break;
}
case Instruction::Xor:
- if (BO->hasOneUse()) {
- if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
- // For the xor case, we can xor two constants together, eliminating
- // the explicit xor.
- return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
- } else if (C.isZero()) {
- // Replace ((xor A, B) != 0) with (A != B)
- return new ICmpInst(Pred, BOp0, BOp1);
- }
+ if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
+ // For the xor case, we can xor two constants together, eliminating
+ // the explicit xor.
+ return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
+ } else if (C.isZero()) {
+ // Replace ((xor A, B) != 0) with (A != B)
+ return new ICmpInst(Pred, BOp0, BOp1);
}
break;
case Instruction::Or: {
diff --git a/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll b/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll
index f5d5ef32c81e81..e8a78df6d5f756 100644
--- a/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-equality-xor.ll
@@ -88,7 +88,7 @@ define i1 @cmpeq_xor_cst1_commuted(i32 %a, i32 %b) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 10
; CHECK-NEXT: ret i1 [[CMP]]
;
- %b2 = mul i32 %b, %b ; thwart complexity-based canonicalization
+ %b2 = mul i32 %b, %b ; thwart complexity-based canonicalization
%c = xor i32 %a, 10
%cmp = icmp eq i32 %b2, %c
ret i1 %cmp
@@ -145,3 +145,43 @@ entry:
%cmp = icmp ne <2 x i8> %xor, <i8 9, i8 79>
ret <2 x i1> %cmp
}
+
+declare void @use.i8(i8)
+define i1 @fold_xorC_eq0_multiuse(i8 %x, i8 %y) {
+; CHECK-LABEL: @fold_xorC_eq0_multiuse(
+; CHECK-NEXT: [[XX:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X]], [[Y]]
+; CHECK-NEXT: call void @use.i8(i8 [[XX]])
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %xx = xor i8 %x, %y
+ %r = icmp eq i8 %xx, 0
+ call void @use.i8(i8 %xx)
+ ret i1 %r
+}
+
+define i1 @fold_xorC_eq1_multiuse_fail(i8 %x, i8 %y) {
+; CHECK-LABEL: @fold_xorC_eq1_multiuse_fail(
+; CHECK-NEXT: [[XX:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[XX]], 1
+; CHECK-NEXT: call void @use.i8(i8 [[XX]])
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %xx = xor i8 %x, %y
+ %r = icmp eq i8 %xx, 1
+ call void @use.i8(i8 %xx)
+ ret i1 %r
+}
+
+define i1 @fold_xorC_neC_multiuse(i8 %x) {
+; CHECK-LABEL: @fold_xorC_neC_multiuse(
+; CHECK-NEXT: [[XX:%.*]] = xor i8 [[X:%.*]], 45
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 110
+; CHECK-NEXT: call void @use.i8(i8 [[XX]])
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %xx = xor i8 %x, 45
+ %r = icmp ne i8 %xx, 67
+ call void @use.i8(i8 %xx)
+ ret i1 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll
index 922845c1e7e2d8..dce8b6a3b282e6 100644
--- a/llvm/test/Transforms/InstCombine/icmp-or.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-or.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s
declare void @use(i8)
@@ -434,7 +434,7 @@ define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
; CHECK-NEXT: [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OR]], 0
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[XOR]], 0
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[X1]], [[Y1]]
; CHECK-NEXT: [[OR1:%.*]] = or i1 [[CMP]], [[CMP_1]]
; CHECK-NEXT: ret i1 [[OR1]]
;
@@ -455,7 +455,7 @@ define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
; CHECK-NEXT: [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OR]], 0
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[XOR1]], 0
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[X2]], [[Y2]]
; CHECK-NEXT: [[OR1:%.*]] = or i1 [[CMP]], [[CMP_1]]
; CHECK-NEXT: ret i1 [[OR1]]
;
diff --git a/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
index cd05022b0d35da..e5906e7a969730 100644
--- a/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
+++ b/llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
@@ -7,9 +7,9 @@
define zeroext i1 @test1(i32 %lhs, i32 %rhs) {
; CHECK-LABEL: @test1(
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LHS:%.*]], 15
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[LHS]], [[RHS:%.*]]
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP1]], 5
; CHECK-NEXT: [[SEL:%.*]] = or i1 [[CMP1]], [[CMP2]]
; CHECK-NEXT: ret i1 [[SEL]]
;
@@ -23,9 +23,9 @@ define zeroext i1 @test1(i32 %lhs, i32 %rhs) {
define zeroext i1 @test1_logical(i32 %lhs, i32 %rhs) {
; CHECK-LABEL: @test1_logical(
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10
-; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LHS:%.*]], 15
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[LHS]], [[RHS:%.*]]
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP1]], 5
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP2]]
; CHECK-NEXT: ret i1 [[SEL]]
;
@@ -40,7 +40,7 @@ define zeroext i1 @test1_logical(i32 %lhs, i32 %rhs) {
define zeroext i1 @test2(i32 %lhs, i32 %rhs) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], [[RHS:%.*]]
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 0
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LHS]], [[RHS]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], 32
; CHECK-NEXT: [[SEL:%.*]] = xor i1 [[CMP1]], [[CMP2]]
; CHECK-NEXT: ret i1 [[SEL]]
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 278cabdff9ed3e..30bbc86b091494 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -3698,7 +3698,8 @@ exit:
define i32 @src_select_xxory_eq0_xorxy_y(i32 %x, i32 %y) {
; CHECK-LABEL: @src_select_xxory_eq0_xorxy_y(
; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 0, i32 [[Y]]
+; CHECK-NEXT: [[XOR:%.*]] = select i1 [[XOR0]], i32 [[X]], i32 0
+; CHECK-NEXT: [[COND:%.*]] = xor i32 [[XOR]], [[Y]]
; CHECK-NEXT: ret i32 [[COND]]
;
%xor = xor i32 %x, %y
|
@@ -455,7 +455,7 @@ define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) { | |||
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]] | |||
; CHECK-NEXT: [[OR:%.*]] = or i64 [[XOR]], [[XOR1]] | |||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OR]], 0 | |||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[XOR1]], 0 | |||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[X2]], [[Y2]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We end up simplifying this entire thing on the next iteration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?
We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.
In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?
I'm 100% sure what is unlocked.
It could also be we are hitting some analysis pattern in the new form.
We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.
In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.
Kk
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah its the creation of new one use. I added a comment explaining.
Think handling in handleUseCountDecrement()
requires pretty deep adding. Doubt its worth it. Esp when we run InstCombine multiple times in the real pipeline.
@@ -3698,7 +3698,8 @@ exit: | |||
define i32 @src_select_xxory_eq0_xorxy_y(i32 %x, i32 %y) { | |||
; CHECK-LABEL: @src_select_xxory_eq0_xorxy_y( | |||
; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] | |||
; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 0, i32 [[Y]] | |||
; CHECK-NEXT: [[XOR:%.*]] = select i1 [[XOR0]], i32 [[X]], i32 0 | |||
; CHECK-NEXT: [[COND:%.*]] = xor i32 [[XOR]], [[Y]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be cleaned up by: #73362
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nevermind, had to cleaned up with bespoke code.
32a4412
to
a0cf8df
Compare
a0cf8df
to
41ce9ab
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please move the foldSelectValueEquivalence() changes into a separate PR (to land first)?
// would lead to an infinite replacement cycle. | ||
// If we will be able to evaluate f(Y) to a constant, we can allow undef, | ||
// otherwise Y cannot be undef as we might pick different values for undef | ||
// in the icmp and in f(Y). Additionally |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cut off sentence.
Done, see: #88298 |
ping |
NB: this depends on #88298 |
Two folds unlocked: `(icmp eq/ne (xor x, C0), C1)` -> `(icmp eq/ne x, C2)` `(icmp eq/ne (xor x, y), 0)` -> `(icmp eq/ne x, y)` This fixes regressions assosiated with llvm#87180
41ce9ab
to
7e70922
Compare
Rebased, should be all green w/ the selectequivilence patch in. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -455,7 +455,7 @@ define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) { | |||
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]] | |||
; CHECK-NEXT: [[OR:%.*]] = or i64 [[XOR]], [[XOR1]] | |||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OR]], 0 | |||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[XOR1]], 0 | |||
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[X2]], [[Y2]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?
We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.
In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Two folds unlocked: `(icmp eq/ne (xor x, C0), C1)` -> `(icmp eq/ne x, C2)` `(icmp eq/ne (xor x, y), 0)` -> `(icmp eq/ne x, y)` This fixes regressions assosiated with llvm#87180 Closes llvm#87275 Signed-off-by: Hafidz Muzakky <ais.muzakky@gmail.com>
(icmp eq/ne (xor x, C0), C1)
; NFC(icmp eq/ne (xor x, y), C1)
even if multiuseTwo folds unlocked:
(icmp eq/ne (xor x, C0), C1)
->(icmp eq/ne x, C2)
(icmp eq/ne (xor x, y), 0)
->(icmp eq/ne x, y)
This fixes regressions assosiated with #87180