Skip to content

Commit

Permalink
[SCEVExpander] Only create multiplication if needed.
Browse files Browse the repository at this point in the history
9345ab3 updated generateOverflowCheck to skip creating checks that
always evaluate to false. This in turn means that we only need to
compute |Step| * Trip count  if the result of the multiplication is
actually used.

Sink the multiplication into ComputeEndCheck, so it is only created
when there's an actual check.
  • Loading branch information
fhahn committed Jan 10, 2022
1 parent 3891619 commit ad1b877
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 30 deletions.
31 changes: 16 additions & 15 deletions llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2494,21 +2494,6 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);

// Compute |Step| * Backedge
Value *MulV, *OfMul;
if (Step->isOne()) {
// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
// needed, there is never an overflow, so to avoid artificially inflating
// the cost of the check, directly emit the optimized IR.
MulV = TruncTripCount;
OfMul = ConstantInt::getFalse(MulV->getContext());
} else {
auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
Intrinsic::umul_with_overflow, Ty);
CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
}

// Compute:
// 1. Start + |Step| * Backedge < Start
// 2. Start - |Step| * Backedge > Start
Expand All @@ -2521,6 +2506,22 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
return ConstantInt::getFalse(Loc->getContext());

Value *MulV, *OfMul;
if (Step->isOne()) {
// Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
// needed, there is never an overflow, so to avoid artificially inflating
// the cost of the check, directly emit the optimized IR.
MulV = TruncTripCount;
OfMul = ConstantInt::getFalse(MulV->getContext());
} else {
auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
Intrinsic::umul_with_overflow, Ty);
CallInst *Mul =
Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
}

Value *Add = nullptr, *Sub = nullptr;
bool NeedPosCheck = !SE.isKnownNegative(Step);
bool NeedNegCheck = !SE.isKnownPositive(Step);
Expand Down
14 changes: 4 additions & 10 deletions llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK: for.body.lver.check:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
Expand Down Expand Up @@ -88,10 +85,10 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: for.end.loopexit6:
; CHECK: for.end.loopexit3:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -153,9 +150,6 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
; CHECK: for.body.lver.check:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; CHECK-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
Expand Down Expand Up @@ -227,10 +221,10 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END:%.*]]
; CHECK: for.end.loopexit5:
; CHECK: for.end.loopexit2:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ define void @f1(i16* noalias %a,
; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8*
; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
; LV-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
; LV-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
; LV-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; LV-NEXT: [[TMP8:%.*]] = or i1 false, [[TMP7]]
; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
Expand Down Expand Up @@ -78,10 +75,10 @@ define void @f1(i16* noalias %a,
; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1
; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1
; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT3:%.*]], label [[FOR_BODY]]
; LV: for.end.loopexit:
; LV-NEXT: br label [[FOR_END:%.*]]
; LV: for.end.loopexit6:
; LV: for.end.loopexit3:
; LV-NEXT: br label [[FOR_END]]
; LV: for.end:
; LV-NEXT: ret void
Expand Down

0 comments on commit ad1b877

Please sign in to comment.