Skip to content

Commit 345d7b1

Browse files
authored
[InstCombine] Fold minmax intrinsic using KnownBits information (#76242)
This patch tries to fold minmax intrinsic by using `computeConstantRangeIncludingKnownBits`. Fixes regression in [_karatsuba_rec:cpython/Modules/_decimal/libmpdec/mpdecimal.c](https://github.com/python/cpython/blob/c31943af16f885c8cf5d5a690c25c366afdb2862/Modules/_decimal/libmpdec/mpdecimal.c#L5460-L5462), which was introduced by #71396. See also dtcxzyw/llvm-opt-benchmark#16 (comment). Alive2 for splat vectors with undef: https://alive2.llvm.org/ce/z/J8hKWd
1 parent 9b6ea5e commit 345d7b1

File tree

4 files changed

+118
-4
lines changed

4 files changed

+118
-4
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,11 @@ ConstantRange computeConstantRange(const Value *V, bool ForSigned,
863863
const DominatorTree *DT = nullptr,
864864
unsigned Depth = 0);
865865

866+
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
867+
ConstantRange
868+
computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
869+
bool ForSigned, const SimplifyQuery &SQ);
870+
866871
/// Return true if this function can prove that the instruction I will
867872
/// always transfer execution to one of its successors (including the next
868873
/// instruction that follows within a basic block). E.g. this is not

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6289,10 +6289,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
62896289
}
62906290

62916291
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
6292-
static ConstantRange
6293-
computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
6294-
bool ForSigned,
6295-
const SimplifyQuery &SQ) {
6292+
ConstantRange
6293+
llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
6294+
bool ForSigned,
6295+
const SimplifyQuery &SQ) {
62966296
ConstantRange CR1 =
62976297
ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
62986298
ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,6 +1796,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
17961796
if (Instruction *NewMinMax = factorizeMinMaxTree(II))
17971797
return NewMinMax;
17981798

1799+
// Try to fold minmax with constant RHS based on range information
1800+
const APInt *RHSC;
1801+
if (match(I1, m_APIntAllowUndef(RHSC))) {
1802+
ICmpInst::Predicate Pred =
1803+
ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID));
1804+
bool IsSigned = MinMaxIntrinsic::isSigned(IID);
1805+
ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
1806+
I0, IsSigned, SQ.getWithInstruction(II));
1807+
if (!LHS_CR.isFullSet()) {
1808+
if (LHS_CR.icmp(Pred, *RHSC))
1809+
return replaceInstUsesWith(*II, I0);
1810+
if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
1811+
return replaceInstUsesWith(*II,
1812+
ConstantInt::get(II->getType(), *RHSC));
1813+
}
1814+
}
1815+
17991816
break;
18001817
}
18011818
case Intrinsic::bitreverse: {

llvm/test/Transforms/InstCombine/minmax-intrinsics.ll

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,3 +2489,95 @@ define i1 @PR57986() {
24892489
%umin = call i1 @llvm.umin.i1(i1 ptrtoint (ptr @g to i1), i1 true)
24902490
ret i1 %umin
24912491
}
2492+
2493+
define i8 @fold_umax_with_knownbits_info(i8 %a, i8 %b) {
2494+
; CHECK-LABEL: @fold_umax_with_knownbits_info(
2495+
; CHECK-NEXT: entry:
2496+
; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1
2497+
; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1
2498+
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
2499+
; CHECK-NEXT: ret i8 [[SUB]]
2500+
;
2501+
entry:
2502+
%a1 = or i8 %a, 1
2503+
%a2 = shl i8 %b, 1
2504+
%sub = sub i8 %a1, %a2
2505+
%val = call i8 @llvm.umax.i8(i8 %sub, i8 1)
2506+
ret i8 %val
2507+
}
2508+
2509+
define <3 x i8> @fold_umax_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) {
2510+
; CHECK-LABEL: @fold_umax_with_knownbits_info_undef_in_splat(
2511+
; CHECK-NEXT: entry:
2512+
; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], <i8 1, i8 1, i8 1>
2513+
; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], <i8 1, i8 1, i8 1>
2514+
; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i8> [[A1]], [[A2]]
2515+
; CHECK-NEXT: ret <3 x i8> [[SUB]]
2516+
;
2517+
entry:
2518+
%a1 = or <3 x i8> %a, <i8 1, i8 1, i8 1>
2519+
%a2 = shl <3 x i8> %b, <i8 1, i8 1, i8 1>
2520+
%sub = sub <3 x i8> %a1, %a2
2521+
%val = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %sub, <3 x i8> <i8 1, i8 undef, i8 1>)
2522+
ret <3 x i8> %val
2523+
}
2524+
2525+
define i8 @fold_umin_with_knownbits_info(i8 %a, i8 %b) {
2526+
; CHECK-LABEL: @fold_umin_with_knownbits_info(
2527+
; CHECK-NEXT: entry:
2528+
; CHECK-NEXT: ret i8 3
2529+
;
2530+
entry:
2531+
%a1 = or i8 %a, 3
2532+
%a2 = shl i8 %b, 2
2533+
%sub = sub i8 %a1, %a2
2534+
%val = call i8 @llvm.umin.i8(i8 %sub, i8 3)
2535+
ret i8 %val
2536+
}
2537+
2538+
define <3 x i8> @fold_umin_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) {
2539+
; CHECK-LABEL: @fold_umin_with_knownbits_info_undef_in_splat(
2540+
; CHECK-NEXT: entry:
2541+
; CHECK-NEXT: ret <3 x i8> <i8 3, i8 3, i8 3>
2542+
;
2543+
entry:
2544+
%a1 = or <3 x i8> %a, <i8 3, i8 3, i8 3>
2545+
%a2 = shl <3 x i8> %b, <i8 2, i8 2, i8 2>
2546+
%sub = sub <3 x i8> %a1, %a2
2547+
%val = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %sub, <3 x i8> <i8 3, i8 undef, i8 3>)
2548+
ret <3 x i8> %val
2549+
}
2550+
2551+
define i8 @fold_umax_with_knownbits_info_fail(i8 %a, i8 %b) {
2552+
; CHECK-LABEL: @fold_umax_with_knownbits_info_fail(
2553+
; CHECK-NEXT: entry:
2554+
; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 2
2555+
; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 1
2556+
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
2557+
; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umax.i8(i8 [[SUB]], i8 1)
2558+
; CHECK-NEXT: ret i8 [[VAL]]
2559+
;
2560+
entry:
2561+
%a1 = or i8 %a, 2
2562+
%a2 = shl i8 %b, 1
2563+
%sub = sub i8 %a1, %a2
2564+
%val = call i8 @llvm.umax.i8(i8 %sub, i8 1)
2565+
ret i8 %val
2566+
}
2567+
2568+
define i8 @fold_umin_with_knownbits_info_fail(i8 %a, i8 %b) {
2569+
; CHECK-LABEL: @fold_umin_with_knownbits_info_fail(
2570+
; CHECK-NEXT: entry:
2571+
; CHECK-NEXT: [[A1:%.*]] = or i8 [[A:%.*]], 1
2572+
; CHECK-NEXT: [[A2:%.*]] = shl i8 [[B:%.*]], 2
2573+
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[A1]], [[A2]]
2574+
; CHECK-NEXT: [[VAL:%.*]] = call i8 @llvm.umin.i8(i8 [[SUB]], i8 3)
2575+
; CHECK-NEXT: ret i8 [[VAL]]
2576+
;
2577+
entry:
2578+
%a1 = or i8 %a, 1
2579+
%a2 = shl i8 %b, 2
2580+
%sub = sub i8 %a1, %a2
2581+
%val = call i8 @llvm.umin.i8(i8 %sub, i8 3)
2582+
ret i8 %val
2583+
}

0 commit comments

Comments
 (0)