Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Fold (icmp eq/ne (xor x, y), C1) even if multiuse #87275

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3502,15 +3502,13 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
break;
}
case Instruction::Xor:
if (BO->hasOneUse()) {
if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
// For the xor case, we can xor two constants together, eliminating
// the explicit xor.
return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
} else if (C.isZero()) {
// Replace ((xor A, B) != 0) with (A != B)
return new ICmpInst(Pred, BOp0, BOp1);
}
if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
// For the xor case, we can xor two constants together, eliminating
// the explicit xor.
return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
} else if (C.isZero()) {
// Replace ((xor A, B) != 0) with (A != B)
return new ICmpInst(Pred, BOp0, BOp1);
}
break;
case Instruction::Or: {
Expand Down
42 changes: 41 additions & 1 deletion llvm/test/Transforms/InstCombine/icmp-equality-xor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ define i1 @cmpeq_xor_cst1_commuted(i32 %a, i32 %b) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 10
; CHECK-NEXT: ret i1 [[CMP]]
;
%b2 = mul i32 %b, %b ; thwart complexity-based canonicalization
%b2 = mul i32 %b, %b ; thwart complexity-based canonicalization
%c = xor i32 %a, 10
%cmp = icmp eq i32 %b2, %c
ret i1 %cmp
Expand Down Expand Up @@ -145,3 +145,43 @@ entry:
%cmp = icmp ne <2 x i8> %xor, <i8 9, i8 79>
ret <2 x i1> %cmp
}

declare void @use.i8(i8)
define i1 @fold_xorC_eq0_multiuse(i8 %x, i8 %y) {
; CHECK-LABEL: @fold_xorC_eq0_multiuse(
; CHECK-NEXT: [[XX:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X]], [[Y]]
; CHECK-NEXT: call void @use.i8(i8 [[XX]])
; CHECK-NEXT: ret i1 [[R]]
;
%xx = xor i8 %x, %y
%r = icmp eq i8 %xx, 0
call void @use.i8(i8 %xx)
ret i1 %r
}

define i1 @fold_xorC_eq1_multiuse_fail(i8 %x, i8 %y) {
; CHECK-LABEL: @fold_xorC_eq1_multiuse_fail(
; CHECK-NEXT: [[XX:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[XX]], 1
; CHECK-NEXT: call void @use.i8(i8 [[XX]])
; CHECK-NEXT: ret i1 [[R]]
;
%xx = xor i8 %x, %y
%r = icmp eq i8 %xx, 1
call void @use.i8(i8 %xx)
ret i1 %r
}

define i1 @fold_xorC_neC_multiuse(i8 %x) {
; CHECK-LABEL: @fold_xorC_neC_multiuse(
; CHECK-NEXT: [[XX:%.*]] = xor i8 [[X:%.*]], 45
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X]], 110
; CHECK-NEXT: call void @use.i8(i8 [[XX]])
; CHECK-NEXT: ret i1 [[R]]
;
%xx = xor i8 %x, 45
%r = icmp ne i8 %xx, 67
call void @use.i8(i8 %xx)
ret i1 %r
}
12 changes: 6 additions & 6 deletions llvm/test/Transforms/InstCombine/icmp-or.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s

declare void @use(i8)

Expand Down Expand Up @@ -434,7 +434,7 @@ define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
; CHECK-NEXT: [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OR]], 0
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[XOR]], 0
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[X1]], [[Y1]]
; CHECK-NEXT: [[OR1:%.*]] = or i1 [[CMP]], [[CMP_1]]
; CHECK-NEXT: ret i1 [[OR1]]
;
Expand All @@ -455,7 +455,7 @@ define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
; CHECK-NEXT: [[OR:%.*]] = or i64 [[XOR]], [[XOR1]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OR]], 0
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[XOR1]], 0
; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[X2]], [[Y2]]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We end up simplifying this entire thing on the next iteration.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?

We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.

In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I understand right that this is because this drops the use count of xor1, which enables an optimization of cmp?

I'm 100% sure what is unlocked.
It could also be we are hitting some analysis pattern in the new form.

We could probably handle this by following a deeper chain inside handleUseCountDecrement(), though I'm not sure that would really be worthwhile. I'm fine with landing this as-is for now.

In any case, it would be good to add a comment to the top of the test to explain why there is the no-verify-fixpoint.

Kk

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah its the creation of new one use. I added a comment explaining.
Think handling in handleUseCountDecrement() requires pretty deep adding. Doubt its worth it. Esp when we run InstCombine multiple times in the real pipeline.

; CHECK-NEXT: [[OR1:%.*]] = or i1 [[CMP]], [[CMP_1]]
; CHECK-NEXT: ret i1 [[OR1]]
;
Expand Down Expand Up @@ -955,7 +955,7 @@ define i1 @icmp_or_xor_with_sub_3_6(i64 %x1, i64 %y1, i64 %x2, i64 %y2, i64 %x3,

define i1 @or_disjoint_with_constants(i8 %x) {
; CHECK-LABEL: @or_disjoint_with_constants(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP1:%.*]], 18
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 18
; CHECK-NEXT: ret i1 [[CMP]]
;
%or = or disjoint i8 %x, 1
Expand All @@ -966,8 +966,8 @@ define i1 @or_disjoint_with_constants(i8 %x) {

define i1 @or_disjoint_with_constants2(i8 %x) {
; CHECK-LABEL: @or_disjoint_with_constants2(
; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[TMP1:%.*]], 5
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[TMP1]], 66
; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X:%.*]], 5
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X]], 66
; CHECK-NEXT: call void @use(i8 [[OR]])
; CHECK-NEXT: ret i1 [[CMP]]
;
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/InstCombine/prevent-cmp-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

define zeroext i1 @test1(i32 %lhs, i32 %rhs) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LHS:%.*]], 15
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[LHS]], [[RHS:%.*]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP1]], 5
; CHECK-NEXT: [[SEL:%.*]] = or i1 [[CMP1]], [[CMP2]]
; CHECK-NEXT: ret i1 [[SEL]]
;
Expand All @@ -23,9 +23,9 @@ define zeroext i1 @test1(i32 %lhs, i32 %rhs) {

define zeroext i1 @test1_logical(i32 %lhs, i32 %rhs) {
; CHECK-LABEL: @test1_logical(
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], 5
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 10
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], [[RHS:%.*]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LHS:%.*]], 15
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[LHS]], [[RHS:%.*]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP1]], 5
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i1 true, i1 [[CMP2]]
; CHECK-NEXT: ret i1 [[SEL]]
;
Expand All @@ -40,7 +40,7 @@ define zeroext i1 @test1_logical(i32 %lhs, i32 %rhs) {
define zeroext i1 @test2(i32 %lhs, i32 %rhs) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[LHS:%.*]], [[RHS:%.*]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[XOR]], 0
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[LHS]], [[RHS]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[XOR]], 32
; CHECK-NEXT: [[SEL:%.*]] = xor i1 [[CMP1]], [[CMP2]]
; CHECK-NEXT: ret i1 [[SEL]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/InstCombine/select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4547,7 +4547,7 @@ define i32 @src_no_trans_select_xor_eq0_xor_or(i32 %x, i32 %y) {
define i32 @src_no_trans_select_xor_eq0_and_xor(i32 %x, i32 %y) {
; CHECK-LABEL: @src_no_trans_select_xor_eq0_and_xor(
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], 0
; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[X]], [[Y]]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[AND]], i32 [[XOR]]
; CHECK-NEXT: ret i32 [[COND]]
Expand All @@ -4563,7 +4563,7 @@ define i32 @src_no_trans_select_xor_eq0_and_xor(i32 %x, i32 %y) {
define i32 @src_no_trans_select_xor_eq0_or_xor(i32 %x, i32 %y) {
; CHECK-LABEL: @src_no_trans_select_xor_eq0_or_xor(
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], 0
; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[X]], [[Y]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[OR]], i32 [[XOR]]
; CHECK-NEXT: ret i32 [[COND]]
Expand Down
Loading