-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Try optimizing with knownbits which determined from Cond #91762
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: hanbeom (ParkHanbum) ChangesThis PR was submitted to implement the optimizations discussed in #73362. Q. Patch is 24.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91762.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 571e44cdac265..0bf8c4ce0faa7 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -94,6 +94,10 @@ void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known);
void computeKnownBitsFromContext(const Value *V, KnownBits &Known,
unsigned Depth, const SimplifyQuery &Q);
+void computeKnownBitsFromCond(const Value *V, Value *Cond, KnownBits &Known,
+ unsigned Depth, const SimplifyQuery &SQ,
+ bool Invert);
+
/// Using KnownBits LHS/RHS produce the known bits for logic op (and/xor/or).
KnownBits analyzeKnownBitsFromAndXorOr(const Operator *I,
const KnownBits &KnownLHS,
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index ea1f4fc3b85dc..e654eeb1b234a 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -438,6 +438,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
return llvm::computeKnownBits(V, Depth, SQ.getWithInstruction(CxtI));
}
+ void computeKnownBitsFromCond(const Value *V, ICmpInst *Cmp, KnownBits &Known,
+ unsigned Depth, const Instruction *CxtI,
+ bool Invert) const {
+ llvm::computeKnownBitsFromCond(V, Cmp, Known, Depth,
+ SQ.getWithInstruction(CxtI), Invert);
+ }
+
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false,
unsigned Depth = 0,
const Instruction *CxtI = nullptr) {
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index de38eddaa98fe..dba9b394ff165 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -735,9 +735,9 @@ static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
}
-static void computeKnownBitsFromCond(const Value *V, Value *Cond,
- KnownBits &Known, unsigned Depth,
- const SimplifyQuery &SQ, bool Invert) {
+void llvm::computeKnownBitsFromCond(const Value *V, Value *Cond,
+ KnownBits &Known, unsigned Depth,
+ const SimplifyQuery &SQ, bool Invert) {
Value *A, *B;
if (Depth < MaxAnalysisRecursionDepth &&
match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 117eb7a1dcc93..ef9931f440d0d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1790,6 +1790,160 @@ static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI,
return nullptr;
}
+// ICmpInst of SelectInst is not included in the calculation of KnownBits
+// so we are missing the opportunity to optimize the Value of the True or
+// False Condition via ICmpInst with KnownBits.
+//
+// Consider:
+// %or = or i32 %x, %y
+// %or0 = icmp eq i32 %or, 0
+// %and = and i32 %x, %y
+// %cond = select i1 %or0, i32 %and, i32 %or
+// ret i32 %cond
+//
+// Expect:
+// %or = or i32 %x, %y
+// ret i32 %or
+//
+// We could know what bit was enabled for %x, %y by ICmpInst in SelectInst.
+static Instruction *foldSelectICmpBinOp(SelectInst &SI, ICmpInst *ICI,
+ Value *CmpLHS, Value *CmpRHS,
+ Value *TVal, Value *FVal,
+ InstCombinerImpl &IC) {
+ Value *X, *Y;
+ const APInt *C;
+
+ if (!((match(CmpLHS, m_BinOp(m_Value(X), m_Value(Y))) &&
+ match(CmpRHS, m_APInt(C))) &&
+ (match(TVal, m_c_BinOp(m_Specific(X), m_Value())) ||
+ match(TVal, m_c_BinOp(m_Specific(Y), m_Value())))))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "ENTER: ";CmpLHS->dump();CmpRHS->dump();TVal->dump(););
+ enum SpecialKnownBits {
+ NOTHING_SPECIAL = 0,
+ NO_COMMON_BITS = 1 << 1,
+ ALL_COMMON_BITS = 1 << 2,
+ ALL_BITS_ENABLED = 1 << 3,
+ };
+
+ // We cannot know exactly what bits is known in X Y.
+ // Instead, we just know what relationship exist for.
+ auto isSpecialKnownBitsFor = [&](const Instruction *CmpLHS,
+ const APInt *CmpRHS) -> unsigned {
+ unsigned Opc = CmpLHS->getOpcode();
+ if (Opc == Instruction::And) {
+ if (CmpRHS->isZero())
+ return NO_COMMON_BITS;
+ } else if (Opc == Instruction::Xor) {
+ if (CmpRHS->isAllOnes())
+ return NO_COMMON_BITS | ALL_BITS_ENABLED;
+ if (CmpRHS->isZero())
+ return ALL_COMMON_BITS;
+ }
+
+ return NOTHING_SPECIAL;
+ };
+
+ auto hasOperandAt = [&](Instruction *I, Value *Op) -> int {
+ for (unsigned Idx = 0; Idx < I->getNumOperands(); Idx++) {
+ if (I->getOperand(Idx) == Op)
+ return Idx + 1;
+ }
+ return 0;
+ };
+
+ Type *TValTy = TVal->getType();
+ unsigned BitWidth = TVal->getType()->getScalarSizeInBits();
+ auto TValBop = cast<BinaryOperator>(TVal);
+ auto CmpLHSBop = cast<BinaryOperator>(CmpLHS);
+ unsigned XOrder = hasOperandAt(TValBop, X);
+ unsigned YOrder = hasOperandAt(TValBop, Y);
+ unsigned SKB = isSpecialKnownBitsFor(CmpLHSBop, C);
+
+ KnownBits Known;
+ if (TValBop->isBitwiseLogicOp()) {
+ if (SKB != SpecialKnownBits::NOTHING_SPECIAL && XOrder && YOrder) {
+ if (SKB & SpecialKnownBits::NO_COMMON_BITS) {
+ if (SKB & (SpecialKnownBits::ALL_BITS_ENABLED)) {
+ if (TValBop->getOpcode() == Instruction::Xor)
+ Known = KnownBits::makeConstant(APInt(BitWidth, -1));
+ }
+ if (TValBop->getOpcode() == Instruction::And)
+ Known = KnownBits::makeConstant(APInt(BitWidth, 0));
+ else if ((match(TVal, m_c_Or(m_Specific(X), m_Specific(Y))) &&
+ match(FVal, m_c_Xor(m_Specific(X), m_Specific(Y)))) ||
+ (match(TVal, m_c_Xor(m_Specific(X), m_Specific(Y))) &&
+ match(FVal, m_c_Or(m_Specific(X), m_Specific(Y)))))
+ return IC.replaceInstUsesWith(SI, FVal);
+ } else if (SKB & SpecialKnownBits::ALL_COMMON_BITS) {
+ if (TValBop->getOpcode() == Instruction::And ||
+ TValBop->getOpcode() == Instruction::Or)
+ if (TValBop->hasOneUse())
+ return IC.replaceOperand(SI, 1, X);
+ } else if (SKB & SpecialKnownBits::ALL_BITS_ENABLED) {
+ if (TValBop->getOpcode() == Instruction::Or)
+ Known = KnownBits::makeConstant(APInt(BitWidth, -1));
+ }
+ } else {
+ KnownBits XKnown, YKnown, Temp;
+ KnownBits TValBop0KB, TValBop1KB;
+ XKnown = IC.computeKnownBits(X, 0, &SI);
+ IC.computeKnownBitsFromCond(X, ICI, XKnown, 0, &SI, false);
+ YKnown = IC.computeKnownBits(Y, 0, &SI);
+ IC.computeKnownBitsFromCond(Y, ICI, YKnown, 0, &SI, false);
+
+ // Estimate additional KnownBits from the relationship between X and Y
+ CmpInst::Predicate Pred = ICI->getPredicate();
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (CmpLHSBop->getOpcode() == Instruction::And) {
+ XKnown.Zero |= ~*C & YKnown.One;
+ YKnown.Zero |= ~*C & XKnown.One;
+ }
+ if (CmpLHSBop->getOpcode() == Instruction::Or) {
+ XKnown.One |= *C & YKnown.Zero;
+ YKnown.One |= *C & XKnown.Zero;
+ }
+ if (CmpLHSBop->getOpcode() == Instruction::Xor) {
+ XKnown.One |= *C & YKnown.Zero;
+ XKnown.Zero |= *C & YKnown.One;
+ YKnown.One |= *C & XKnown.Zero;
+ YKnown.Zero |= *C & XKnown.One;
+ XKnown.Zero |= ~*C & YKnown.Zero;
+ XKnown.One |= ~*C & YKnown.One;
+ YKnown.Zero |= ~*C & XKnown.Zero;
+ YKnown.One |= ~*C & XKnown.One;
+ }
+ }
+
+ auto getTValBopKB = [&](unsigned OpNum) -> KnownBits {
+ unsigned Order = OpNum + 1;
+ if (Order == XOrder)
+ return XKnown;
+ else if (Order == YOrder)
+ return YKnown;
+
+ Value *V = TValBop->getOperand(OpNum);
+ KnownBits Known = IC.computeKnownBits(V, 0, &SI);
+ IC.computeKnownBitsFromCond(V, ICI, Known, 0, &SI, false);
+ return Known;
+ };
+ TValBop0KB = getTValBopKB(0);
+ TValBop1KB = getTValBopKB(1);
+ Known = analyzeKnownBitsFromAndXorOr(
+ cast<Operator>(TValBop), TValBop0KB, TValBop1KB, 0,
+ IC.getSimplifyQuery().getWithInstruction(&SI));
+ }
+ }
+
+ if (Known.isConstant()) {
+ auto Const = ConstantInt::get(TValTy, Known.getConstant());
+ return IC.replaceOperand(SI, 1, Const);
+ }
+
+ return nullptr;
+}
+
/// Visit a SelectInst that has an ICmpInst as its first operand.
Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
ICmpInst *ICI) {
@@ -1932,6 +2086,10 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Value *V = foldAbsDiff(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
+ if (Instruction *NewSel = foldSelectICmpBinOp(SI, ICI, CmpLHS, CmpRHS,
+ TrueVal, FalseVal, *this))
+ return NewSel;
+
return Changed ? &SI : nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
index e3eb76de459e2..ab2b93252f0d6 100644
--- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll
+++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
@@ -3,7 +3,200 @@
; https://bugs.llvm.org/show_bug.cgi?id=36950
-; These all should be just and+icmp, there should be no select.
+
+
+; ====================== AND =======================
+define i8 @src_and_bit(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_and_bit(
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[AND]], 2
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8 2, i8 1
+; CHECK-NEXT: ret i8 [[COND]]
+;
+ %and = and i8 %x, 3
+ %and1 = and i8 %x, 2
+ %and2 = and i8 %and, %x
+ %cmp = icmp eq i8 %and2, 2
+ %cond = select i1 %cmp, i8 %and1, i8 1
+ ret i8 %cond
+}
+define <2 x i8> @src_and_bit_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @src_and_bit_vec(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[AND]], <i8 2, i8 2>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 2>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %x, <i8 3, i8 3>
+ %and1 = and <2 x i8> %x, <i8 2, i8 2>
+ %and2 = and <2 x i8> %and, %x
+ %cmp = icmp eq <2 x i8> %and2, <i8 2, i8 2>
+ %cond = select <2 x i1> %cmp, <2 x i8> %and1, <2 x i8><i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+define <2 x i8> @src_and_bit_vec_poison(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @src_and_bit_vec_poison(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 poison, i8 3>
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[AND]], <i8 2, i8 2>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 2>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %x, <i8 poison, i8 3>
+ %and1 = and <2 x i8> %x, <i8 poison, i8 2>
+ %and2 = and <2 x i8> %and, %x
+ %cmp = icmp eq <2 x i8> %and2, <i8 2, i8 2>
+ %cond = select <2 x i1> %cmp, <2 x i8> %and1, <2 x i8><i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+define <2 x i8> @src_and_bit_vec_poison2(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @src_and_bit_vec_poison2(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 poison, i8 3>
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[AND]], <i8 2, i8 2>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 2>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %x, <i8 poison, i8 3>
+ %and1 = and <2 x i8> %x, <i8 poison, i8 2>
+ %and2 = and <2 x i8> %and, %x
+ %cmp = icmp eq <2 x i8> %and2, <i8 2, i8 2>
+ %cond = select <2 x i1> %cmp, <2 x i8> %and1, <2 x i8><i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+
+; ====================== OR =======================
+define i8 @src_or_bit(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @src_or_bit(
+; CHECK-NEXT: [[AND1:%.*]] = shl i8 [[Y:%.*]], 2
+; CHECK-NEXT: [[SHL:%.*]] = and i8 [[AND1]], 12
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[SHL]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 3
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8 3, i8 1
+; CHECK-NEXT: ret i8 [[COND]]
+;
+ %and = and i8 %z, 3
+ %and1 = shl i8 %y, 2
+ %shl = and i8 %and1, 12
+ %or = or i8 %shl, %x
+ %cmp = icmp eq i8 %or, 3
+ %or2 = or i8 %and, %x
+ %cond = select i1 %cmp, i8 %or2, i8 1
+ ret i8 %cond
+}
+define <2 x i8> @src_or_bit_vec(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @src_or_bit_vec(
+; CHECK-NEXT: [[AND1:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 2, i8 2>
+; CHECK-NEXT: [[SHL:%.*]] = and <2 x i8> [[AND1]], <i8 12, i8 12>
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[SHL]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[OR]], <i8 3, i8 3>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 3, i8 3>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %z, <i8 3, i8 3>
+ %and1 = shl <2 x i8> %y, <i8 2, i8 2>
+ %shl = and <2 x i8> %and1, <i8 12, i8 12>
+ %or = or <2 x i8> %shl, %x
+ %cmp = icmp eq <2 x i8> %or, <i8 3, i8 3>
+ %or2 = or <2 x i8> %and, %x
+ %cond = select <2x i1> %cmp, <2 x i8> %or2, <2 x i8> <i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+define <2 x i8> @src_or_bit_vec_poison(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @src_or_bit_vec_poison(
+; CHECK-NEXT: [[AND1:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 2, i8 poison>
+; CHECK-NEXT: [[SHL:%.*]] = and <2 x i8> [[AND1]], <i8 12, i8 poison>
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[SHL]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[OR]], <i8 3, i8 3>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 3, i8 3>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %z, <i8 3, i8 poison>
+ %and1 = shl <2 x i8> %y, <i8 2, i8 poison>
+ %shl = and <2 x i8> %and1, <i8 12, i8 poison>
+ %or = or <2 x i8> %shl, %x
+ %cmp = icmp eq <2 x i8> %or, <i8 3, i8 3>
+ %or2 = or <2 x i8> %and, %x
+ %cond = select <2 x i1> %cmp, <2 x i8> %or2, <2 x i8> <i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+define <2 x i8> @src_or_bit_vec_poison2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @src_or_bit_vec_poison2(
+; CHECK-NEXT: [[AND1:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 poison, i8 2>
+; CHECK-NEXT: [[SHL:%.*]] = and <2 x i8> [[AND1]], <i8 poison, i8 12>
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[SHL]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[OR]], <i8 3, i8 3>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 3, i8 3>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %z, <i8 poison, i8 3>
+ %and1 = shl <2 x i8> %y, <i8 poison, i8 2>
+ %shl = and <2 x i8> %and1, <i8 poison, i8 12>
+ %or = or <2 x i8> %shl, %x
+ %cmp = icmp eq <2 x i8> %or, <i8 3, i8 3>
+ %or2 = or <2 x i8> %and, %x
+ %cond = select <2 x i1> %cmp, <2 x i8> %or2, <2 x i8> <i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+
+define i8 @src_xor_bit(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_xor_bit(
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y:%.*]], 12
+; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XOR]], 3
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8 3, i8 1
+; CHECK-NEXT: ret i8 [[COND]]
+;
+ %and = and i8 %y, 12
+ %xor = xor i8 %and, %x
+ %cmp = icmp eq i8 %xor, 3
+ %and1 = and i8 %x, 3
+ %cond = select i1 %cmp, i8 %and1, i8 1
+ ret i8 %cond
+}
+define <2 x i8> @src_xor_bit_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @src_xor_bit_vec(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[Y:%.*]], <i8 12, i8 12>
+; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[AND]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[XOR]], <i8 3, i8 3>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 3, i8 3>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %y, <i8 12, i8 12>
+ %xor = xor <2 x i8> %and, %x
+ %cmp = icmp eq <2 x i8> %xor, <i8 3, i8 3>
+ %and1 = and <2 x i8> %x, <i8 3, i8 3>
+ %cond = select <2 x i1> %cmp, <2 x i8> %and1, <2 x i8> <i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+define <2 x i8> @src_xor_bit_vec_poison(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @src_xor_bit_vec_poison(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[Y:%.*]], <i8 poison, i8 12>
+; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[AND]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[XOR]], <i8 3, i8 3>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 3, i8 3>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %y, <i8 poison, i8 12>
+ %xor = xor <2 x i8> %and, %x
+ %cmp = icmp eq <2 x i8> %xor, <i8 3, i8 3>
+ %and1 = and <2 x i8> %x, <i8 poison, i8 3>
+ %cond = select <2 x i1> %cmp, <2 x i8> %and1, <2 x i8> <i8 1, i8 1>
+ ret <2 x i8> %cond
+}
+define <2 x i8> @src_xor_bit_vec_poison2(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @src_xor_bit_vec_poison2(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[Y:%.*]], <i8 poison, i8 12>
+; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i8> [[AND]], [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[XOR]], <i8 3, i8 3>
+; CHECK-NEXT: [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 3, i8 3>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i8> [[COND]]
+;
+ %and = and <2 x i8> %y, <i8 poison, i8 12>
+ %xor = xor <2 x i8> %and, %x
+ %cmp = icmp eq <2 x i8> %xor, <i8 3, i8 3>
+ %and1 = and <2 x i8> %x, <i8 3, i8 3>
+ %cond = select <2 x i1> %cmp, <2 x i8> %and1, <2 x i8> <i8 1, i8 1>
+ ret <2 x i8> %cond
+}
define i32 @and_lshr_and(i32 %arg) {
; CHECK-LABEL: @and_lshr_and(
@@ -590,13 +783,13 @@ define i32 @n5(i32 %arg) {
; CHECK-LABEL: @n5(
; CHECK-NEXT: [[T:%.*]] = and i32 [[ARG:%.*]], 2
; CHECK-NEXT: [[T1:%.*]] = icmp eq i32 [[T]], 0
-; CHECK-NEXT: [[T2:%.*]] = and i32 [[ARG]], 2
+; CHECK-NEXT: [[T2:%.*]] = and i32 [[ARG]], 3
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T1]], i32 [[T2]], i32 1
; CHECK-NEXT: ret i32 [[T3]]
;
%t = and i32 %arg, 2
%t1 = icmp eq i32 %t, 0
- %t2 = and i32 %arg, 2 ; 2 instead of 1
+ %t2 = and i32 %arg, 3 ; 3 instead of 2
%t3 = select i1 %t1, i32 %t2, i32 1
ret i32 %t3
}
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 2efe2742ca491..e32cb17d85922 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -3701,12 +3701,8 @@ exit:
define i32 @src_and_eq_0_or_xor(i32 %x, i32 %y) {
; CHECK-LABEL: @src_and_eq_0_or_xor(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]]
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[XOR]]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: ret i32 [[XOR]]
;
entry:
%and = and i32 %y, %x
@@ -3721,12 +3717,8 @@ entry:
define i32 @src_and_eq_0_xor_or(i32 %x, i32 %y) {
; CHECK-LABEL: @src_and_eq_0_xor_or(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[OR]]
-; CHECK-NEXT: ret i32 [[COND]]
+; CH...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@nikic Can you check compile-time impact? |
d317b29
to
7e58012
Compare
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T1]], i32 [[T2]], i32 1 | ||
; CHECK-NEXT: ret i32 [[T3]] | ||
; | ||
%t = and i32 %arg, 2 | ||
%t1 = icmp eq i32 %t, 0 | ||
%t2 = and i32 %arg, 2 ; 2 instead of 1 | ||
%t2 = and i32 %arg, 3 ; 3 instead of 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain this change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"%t2 = and i32 %arg, 2" has same knownbits with %t. and because it be in trueval, %t will become 0. therefore %t2 also become 0.
these n series tests are written to keep false cases for AND bit test. so, it was miss written IMO.
if (!((match(CmpLHS, m_BinOp(m_Value(X), m_Value(Y))) && | ||
match(CmpRHS, m_APInt(C))) && | ||
(match(TVal, m_c_BinOp(m_Specific(X), m_Value())) || | ||
match(TVal, m_c_BinOp(m_Specific(Y), m_Value()))))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing support for swapped operands.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to be honest, I didn't understand what you mean. 😭
I would appreciate it if you could explain it in more detail.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider the following example: https://alive2.llvm.org/ce/z/f579pe
define i32 @src(i32 %x, i32 %y) {
%or = or i32 %x, %y
%or0 = icmp ne i32 %or, 0
%and = and i32 %x, %y
%cond = select i1 %or0, i32 %or, i32 %and
ret i32 %cond
}
define i32 @tgt(i32 %x, i32 %y) {
%or = or i32 %x, %y
ret i32 %or
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thanks for explain! works done.
return nullptr; | ||
|
||
enum SpecialKnownBits { | ||
NOTHING_SPECIAL = 0, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
|
@@ -1790,6 +1790,159 @@ static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI, | |||
return nullptr; | |||
} | |||
|
|||
// ICmpInst of SelectInst is not included in the calculation of KnownBits | |||
// so we are missing the opportunity to optimize the Value of the True or | |||
// False Condition via ICmpInst with KnownBits. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO, same way to use computeKnownBitsFromCond, but computeKnownBitsFromOperator would compute KnownBits of SelectInst. I use Cond from SelectInst to estimate the KnownBits of the variables that make up Cond. The key is that if we can use it to estimate the KnownBits of Trueval, there is a difference in performing the optimization.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh I see what you mean. This seems very similar to #88298, I think generally this code would fit better as an extension to foldSelectValueEquivilence.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still feel like the code fits better in foldSelectValueEquivilence
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@goldsteinn
I changed it to call inside foldSelectValueEquivalence.
The order is changed so that the optimizations I added are performed first, preventing the previously performed optimizations from being performed. If this happens, should I change the order of the other optimization?
7e58012
to
f0e8dd6
Compare
a31dac3
to
1d51fbd
Compare
return NoCommonBits | AllBitsEnabled; | ||
if (CmpRHS->isZero()) | ||
return AllCommonBits; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or disjoint
you can do NoCommonBits
and with isAllOnes
you can do AllBitsEnabled
.
unsigned SKB = isSpecialKnownBitsFor(CmpLHSBop, C); | ||
|
||
KnownBits Known; | ||
if (TValBop->isBitwiseLogicOp()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since you only support BitWise, can you just early return if not bitwise to reduce the level of nested ifs.
cast<Operator>(TValBop), TValBop0KB, TValBop1KB, 0, | ||
IC.getSimplifyQuery().getWithInstruction(&SI)); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a desperate need for comments in the above code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll update soon!
XKnown.Zero |= ~*C & YKnown.Zero; | ||
XKnown.One |= ~*C & YKnown.One; | ||
YKnown.Zero |= ~*C & XKnown.Zero; | ||
YKnown.One |= ~*C & XKnown.One; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I really don't understand what you are going for here...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
when we handle selectInst which have cond as x ^ y == c
, we could discovering knownbits from cond.
explain,
a. XKnown.One |= *C & YKnown.Zero;
if same bits of C and Y.Zero was enabled, then X.One must be enabled. X = 1, Y = 0, C = 1. X ^ Y == C
b. XKnown.Zero |= *C & YKnown.One;
likely a. if same bits of C and Y.One was enabled, then same bit of X must be 0.
after we discovering it, we can use it usefully to trueval.
this is one of test which contains in pull-request.
define i8 @src_xor_bit(i8 %x, i8 %y) {
; CHECK-LABEL: @src_xor_bit(
; CHECK-NEXT: [[AND:%.*]] = and i8 [[Y:%.*]], 12
; CHECK-NEXT: [[XOR:%.*]] = xor i8 [[AND]], [[X:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XOR]], 3
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8 3, i8 1
; CHECK-NEXT: ret i8 [[COND]]
;
%and = and i8 %y, 12
%xor = xor i8 %and, %x
%cmp = icmp eq i8 %xor, 3
%and1 = and i8 %x, 3
%cond = select i1 %cmp, i8 %and1, i8 1
ret i8 %cond
}
we have cond as (Y & 12) ^ X == 3
in selectinst.
we could discover knownbits of X following steps:
(Y & 12)'s KnownBits = 0000??00(1) 1111??11(0)
(Y & 12) ^ X == 3 (in trueval)
X.One = 00000011(3) & 1111??11(Y&12)
X.Zero = 00000011(3) & 0000??00(Y&12)
so, We can treat X as 3 in trueval of this selectinst ,and it can possible replacing %and1
to 3 as you can see.
358bee6
to
21fa11d
Compare
it passed test-suite. 😆 Testing Time: 277.82s Total Discovered Tests: 2438 |
Tests that begin with n in select-of-bittest are negative tests. However, n5 test is different from the intent of the test because bits in %t and %t2 are the same. This is probably a mistake, so corrected it. Also added tests for cases where the and/or/xor might be optimized if it is placed in the cond and truval of selectInst. proof : https://alive2.llvm.org/ce/z/KJbbbp
to use `Cond` in `SelectInst` for compute KnownBits.
21fa11d
to
0d64c0c
Compare
3e190e7
to
0d64c0c
Compare
…ectInst Cond ICmpInst of SelectInst is not included in the calculation of KnownBits so we are missing the opportunity to optimize the Value of the True or False Condition via ICmpInst with KnownBits. Consider: %or = or i32 %x, %y %or0 = icmp eq i32 %or, 0 %and = and i32 %x, %y %cond = select i1 %or0, i32 %and, i32 %or ret i32 %cond Expect: %or = or i32 %x, %y ret i32 %or We could know what bit was enabled for %x, %y by ICmpInst in SelectInst. This patch is an implementation that calculates the known bits over ICmp and optimizes them where possible. Proof : https://alive2.llvm.org/ce/z/CT7WFW
There are two type of change with this patch. When the Cond of SelectInst is ICmpInst. first is a special case where optimizations can be made. For example, in the case of (icmp eq (xor x, y) -1), it is impossible to estimate the bits of x and y, but the relationship between the bits of x and y is known. We define this as NoCommonBits | AllCommonBits. if (and x, y) is a trueval with this cond, then x and y are in NoCommonBits state, so (and x, y) is zero. second is that if the cond allows us to estimate the KnownBits of X or Y, then if TrueVal is a BinOp consisting of X or Y, we can optimize it. For example, if (icmp eq (or x, y) 0), then both x and y have KnownBits that can be estimated to be 0. Therefore, something like (select (icmp eq (or x, y) 0), (xor x, y), (or x, y)) can be optimized to (or x, y).
0d64c0c
to
413ab0a
Compare
is test result can be different between mypc and test-machine? |
if (auto Inst = dyn_cast<PossiblyDisjointInst>(CmpLHS)) { | ||
if (Inst->isDisjoint()) | ||
IsDisjoint = true; | ||
CmpLHSOpc = Instruction::Or; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you at the very least assert that this is correct. not an issue now, but we may just disjoint
on other instructions in the future.
if (C->isAllOnes()) | ||
return NoCommonBits | AllBitsEnabled; | ||
return NoCommonBits; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(x | y == -1)
you can do AllBitsEnabled
irrelivant of disjoint
} else if ((match(CmpLHS, m_BinOp(m_Value(X), m_Value(Y))) && | ||
match(CmpRHS, m_APInt(C))) && | ||
(match(TVal, m_c_BinOp(m_Specific(X), m_Value())) || | ||
match(TVal, m_c_BinOp(m_Specific(Y), m_Value())))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
afaict, these should only be matching bitwise logic
} else if (SKB & SpecialKnownBits::AllBitsEnabled) { | ||
// We can replace (X|Y) to -1 | ||
if (TValBop->getOpcode() == Instruction::Or) | ||
Known = KnownBits::makeConstant(APInt(BitWidth, -1)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would organize this code a bit differently, instead of going through the knownbits cases and then applying rules to ops, I would switch
on TValBop->getOpcode()
then apply the knownbits cases that apply.
Think that will be easier to follow/extend.
XKnown = IC.computeKnownBits(X, 0, &SI); | ||
IC.computeKnownBitsFromCond(X, ICI, XKnown, 0, &SI, false); | ||
YKnown = IC.computeKnownBits(Y, 0, &SI); | ||
IC.computeKnownBitsFromCond(Y, ICI, YKnown, 0, &SI, false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
comment constants here and above.
static Instruction *foldSelectICmpBinOp(SelectInst &SI, ICmpInst *ICI, | ||
Value *CmpLHS, Value *CmpRHS, | ||
Value *TVal, Value *FVal, | ||
InstCombinerImpl &IC) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would think you could want to call this twice w/ TVal
/FVal
swapped and an Invert
bool
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry, I didn't understood what is your point. would you explain more detail for newbie?
as I know, swap of true and false need to swap compare also. so, we test "equal" compare changed to "not equal" for cases of each. and we also test exchange %x, %y.
this is added tests with this commit:
define i32 @src_or_disjoint_xor(i32 %x, i32 %y) {
; CHECK-LABEL: @src_or_disjoint_xor(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 -1
;
entry:
%or.disjoint = or disjoint i32 %x, %y
%cmp = icmp eq i32 %or.disjoint, -1
%xor = xor i32 %x, %y
%cond = select i1 %cmp, i32 %xor, i32 -1
ret i32 %cond
}
define i32 @src_or_disjoint_xor_comm(i32 %x, i32 %y) {
; CHECK-LABEL: @src_or_disjoint_xor_comm(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 -1
;
entry:
%or.disjoint = or disjoint i32 %y, %x
%cmp = icmp eq i32 -1, %or.disjoint
%xor = xor i32 %y, %x
%cond = select i1 %cmp, i32 %xor, i32 -1
ret i32 %cond
}
define i32 @src_or_disjoint_xor_ne(i32 %x, i32 %y) {
; CHECK-LABEL: @src_or_disjoint_xor_ne(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 -1
;
entry:
%or.disjoint = or disjoint i32 %x, %y
%cmp = icmp ne i32 %or.disjoint, -1
%xor = xor i32 %x, %y
%cond = select i1 %cmp, i32 -1, i32 %xor
ret i32 %cond
}
define i32 @src_or_disjoint_xor_ne_comm(i32 %x, i32 %y) {
; CHECK-LABEL: @src_or_disjoint_xor_ne_comm(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 -1
;
entry:
%or.disjoint = or disjoint i32 %y, %x
%cmp = icmp ne i32 %or.disjoint, -1
%xor = xor i32 %y, %x
%cond = select i1 %cmp, i32 -1, i32 %xor
ret i32 %cond
}
if you explain what you considering then it very helpful for me to improve knowledge of llvm.
// The bit that are set to 0 at `C&Y` must be 1 in X | ||
// The bit that are set to 0 at `C&X` must be 1 in Y | ||
XKnown.One |= *C & YKnown.Zero; | ||
YKnown.One |= *C & XKnown.Zero; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Think we should be handling the or
and and
case in computeKnownBitsFromCond
, no? Otherwise, think we should fix that there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Likewise for the xor
case tbh.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, we can't really do that w/ current infra. Although I think a better way to do this would be to create a new helper in ValueTracking for computing known x/y under the assumption of a cmp
involving x/y is true/false.
edit: The difference from existing APIs is that we have knownx/knowny
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually, these discovering way of knownbits is exist in valuetracker before but it removed more recently. but I think it can be used for this commit. so, I'm not have confidence about creating new api for this in ValueTracker.
but it is beginner's opinion, I'll update code soon.
this is why test failed in windows.
I declare ZeroVal in if-match block, but used its reference after off the block. window compiler didn't allow this, I could see garbage value was setted in
it obviouly my mistake. |
This all looks quite convoluted to me. I think the high-level approach here should basically be main...nikic:perf/select-known-bits, which can be further generalized via SimplifyDemandedBits for non-constant pattern. Main challenge is compile-time overhead, doing this is not free: http://llvm-compile-time-tracker.com/compare.php?from=534f8569a3c9fccfd5cbc5f632b63ad0cf711098&to=3e43eb32001514cddbda56dc22fadc40d557e025&stat=instructions:u The basic idea is to reuse existing KnownBits infra and inject the select condition into it. |
I'm not good enough (even close). sorry for waste time. I wonder if SpecialKnownBits related codes are acceptable. |
Close this PR because a more comprehensive patch has been merged. |
This PR was submitted to implement the optimizations discussed in #73362.
Q.
I have very little experience sending PRs, is this the right way to write commit messages for tests related to a patch?
please let me know if I'm wrong