From b595b8c98d547915c47f99e84dcf9204409aaaae Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Fri, 18 Oct 2024 17:40:06 +0100 Subject: [PATCH 1/9] [SCEV] Collect and merge loop guards through PHI nodes with multiple incoming Values --- llvm/include/llvm/Analysis/ScalarEvolution.h | 5 ++ llvm/lib/Analysis/ScalarEvolution.cpp | 85 +++++++++++++++++-- .../Analysis/ScalarEvolution/trip-count.ll | 82 ++++++++++++++++++ .../Transforms/PhaseOrdering/X86/pr38280.ll | 2 +- 4 files changed, 164 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 179a2c38d9d3c2..cdc46cf24a0546 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1316,6 +1316,11 @@ class ScalarEvolution { LoopGuards(ScalarEvolution &SE) : SE(SE) {} + static LoopGuards + collectFromBlock(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, + const BasicBlock *Block, const BasicBlock *Pred, + SmallPtrSet VisitedBlocks); + public: /// Collect rewrite map for loop guards for loop \p L, together with flags /// indicating if NUW and NSW can be preserved during rewriting. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c939270ed39a65..d9cab0471ef0f3 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10648,7 +10648,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) if (const Loop *L = LI.getLoopFor(BB)) return {L->getLoopPredecessor(), L->getHeader()}; - return {nullptr, nullptr}; + return {nullptr, BB}; } /// SCEV structural equivalence is usually sufficient for testing whether two @@ -15217,7 +15217,16 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { + BasicBlock *Header = L->getHeader(); + BasicBlock *Pred = L->getLoopPredecessor(); LoopGuards Guards(SE); + return collectFromBlock(SE, Guards, Header, Pred, {}); +} + +ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collectFromBlock( + ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, + const BasicBlock *Block, const BasicBlock *Pred, + SmallPtrSet VisitedBlocks) { SmallVector ExprsToRewrite; auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, @@ -15556,14 +15565,13 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { } }; - BasicBlock *Header = L->getHeader(); SmallVector> Terms; // First, collect information from assumptions dominating the loop. for (auto &AssumeVH : SE.AC.assumptions()) { if (!AssumeVH) continue; auto *AssumeI = cast(AssumeVH); - if (!SE.DT.dominates(AssumeI, Header)) + if (!SE.DT.dominates(AssumeI, Block)) continue; Terms.emplace_back(AssumeI->getOperand(0), true); } @@ -15574,8 +15582,8 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { if (GuardDecl) for (const auto *GU : GuardDecl->users()) if (const auto *Guard = dyn_cast(GU)) - if (Guard->getFunction() == Header->getParent() && - SE.DT.dominates(Guard, Header)) + if (Guard->getFunction() == Block->getParent() && + SE.DT.dominates(Guard, Block)) Terms.emplace_back(Guard->getArgOperand(0), true); // Third, collect conditions from dominating branches. Starting at the loop @@ -15583,11 +15591,10 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { // predecessors that can be found that have unique successors leading to the // original header. // TODO: share this logic with isLoopEntryGuardedByCond. - for (std::pair Pair( - L->getLoopPredecessor(), Header); - Pair.first; + std::pair Pair(Pred, Block); + for (; Pair.first; Pair = SE.getPredecessorWithUniqueSuccessorForBB(Pair.first)) { - + VisitedBlocks.insert(Pair.second); const BranchInst *LoopEntryPredicate = dyn_cast(Pair.first->getTerminator()); if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) @@ -15596,6 +15603,66 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { Terms.emplace_back(LoopEntryPredicate->getCondition(), LoopEntryPredicate->getSuccessor(0) == Pair.second); } + // Finally, if we stopped climbing the predecessor chain because + // there wasn't a unique one to continue, try to collect conditions + // for PHINodes by recursively following all of their incoming + // blocks and try to merge the found conditions to build a new one + // for the Phi. + if (Pair.second->hasNPredecessorsOrMore(2)) { + for (auto &Phi : Pair.second->phis()) { + if (!SE.isSCEVable(Phi.getType())) + continue; + + using MinMaxPattern = std::pair; + auto GetMinMaxConst = [&SE, &VisitedBlocks, &Pair, + &Phi](unsigned int In) -> MinMaxPattern { + LoopGuards G(SE); + if (VisitedBlocks.insert(Phi.getIncomingBlock(In)).second) + collectFromBlock(SE, G, Pair.second, Phi.getIncomingBlock(In), + VisitedBlocks); + const SCEV *S = G.RewriteMap[SE.getSCEV(Phi.getIncomingValue(In))]; + auto *SM = dyn_cast_if_present(S); + if (!SM) + return {nullptr, scCouldNotCompute}; + if (const SCEVConstant *C0 = dyn_cast(SM->getOperand(0))) + return {C0, SM->getSCEVType()}; + if (const SCEVConstant *C1 = dyn_cast(SM->getOperand(1))) + return {C1, SM->getSCEVType()}; + return {nullptr, scCouldNotCompute}; + }; + auto MergeMinMaxConst = [](MinMaxPattern P1, + MinMaxPattern P2) -> MinMaxPattern { + auto [C1, T1] = P1; + auto [C2, T2] = P2; + if (!C1 || !C2 || T1 != T2) + return {nullptr, scCouldNotCompute}; + switch (T1) { + case scUMaxExpr: + return {C1->getAPInt().ult(C2->getAPInt()) ? C1 : C2, T1}; + case scSMaxExpr: + return {C1->getAPInt().slt(C2->getAPInt()) ? C1 : C2, T1}; + case scUMinExpr: + return {C1->getAPInt().ugt(C2->getAPInt()) ? C1 : C2, T1}; + case scSMinExpr: + return {C1->getAPInt().sgt(C2->getAPInt()) ? C1 : C2, T1}; + default: + llvm_unreachable("Trying to merge non-MinMaxExpr SCEVs."); + } + }; + auto P = GetMinMaxConst(0); + for (unsigned int In = 1; In < Phi.getNumIncomingValues(); In++) { + if (!P.first) + break; + P = MergeMinMaxConst(P, GetMinMaxConst(In)); + } + if (P.first) { + const SCEV *LHS = SE.getSCEV(const_cast(&Phi)); + SmallVector Ops({P.first, LHS}); + const SCEV *RHS = SE.getMinMaxExpr(P.second, Ops); + Guards.RewriteMap.insert({LHS, RHS}); + } + } + } // Now apply the information from the collected conditions to // Guards.RewriteMap. Conditions are processed in reverse order, so the diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count.ll b/llvm/test/Analysis/ScalarEvolution/trip-count.ll index 8fc5b9b4096127..7304409814b0e1 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count.ll @@ -211,3 +211,85 @@ for.body: exit: ret void } + +define void @epilogue(i64 %count) { +; CHECK-LABEL: 'epilogue' +; CHECK-NEXT: Determining loop execution counts for: @epilogue +; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 6 +; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 +; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: Trip multiple is 1 +entry: + %cmp = icmp ugt i64 %count, 7 + br i1 %cmp, label %while.body, label %epilogue.preheader + +while.body: + %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] + %sub = add i64 %iv, -8 + %exitcond.not = icmp ugt i64 %sub, 7 + br i1 %exitcond.not, label %while.body, label %while.loopexit + +while.loopexit: + %sub.exit = phi i64 [ %sub, %while.body ] + br label %epilogue.preheader + +epilogue.preheader: + %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] + %epilogue.cmp = icmp eq i64 %count.epilogue, 0 + br i1 %epilogue.cmp, label %exit, label %epilogue + +epilogue: + %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] + %dec = add i64 %iv.epilogue, -1 + %exitcond.epilogue = icmp eq i64 %dec, 0 + br i1 %exitcond.epilogue, label %exit, label %epilogue + +exit: + ret void + +} + +define void @epilogue2(i64 %count) { +; CHECK-LABEL: 'epilogue2' +; CHECK-NEXT: Determining loop execution counts for: @epilogue2 +; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 8 +; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 +; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: Trip multiple is 1 +entry: + %cmp = icmp ugt i64 %count, 9 + br i1 %cmp, label %while.body, label %epilogue.preheader + +while.body: + %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] + %sub = add i64 %iv, -8 + %exitcond.not = icmp ugt i64 %sub, 7 + br i1 %exitcond.not, label %while.body, label %while.loopexit + +while.loopexit: + %sub.exit = phi i64 [ %sub, %while.body ] + br label %epilogue.preheader + +epilogue.preheader: + %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] + %epilogue.cmp = icmp eq i64 %count.epilogue, 0 + br i1 %epilogue.cmp, label %exit, label %epilogue + +epilogue: + %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] + %dec = add i64 %iv.epilogue, -1 + %exitcond.epilogue = icmp eq i64 %dec, 0 + br i1 %exitcond.epilogue, label %exit, label %epilogue + +exit: + ret void + +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll index 70b002f766b753..966d7e3cded0ab 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll @@ -41,7 +41,7 @@ define void @apply_delta(ptr nocapture noundef %dst, ptr nocapture noundef reado ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[DST_ADDR_130]], i64 1 ; CHECK-NEXT: [[INCDEC_PTR8]] = getelementptr inbounds i8, ptr [[SRC_ADDR_129]], i64 1 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[DEC]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END9]], label [[WHILE_BODY4]] ; CHECK: while.end9: ; CHECK-NEXT: ret void ; From b5d72e3097b9cd9329ba97c4c50ff9bdef65fb44 Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Tue, 29 Oct 2024 14:15:38 +0000 Subject: [PATCH 2/9] fixup! [SCEV] Collect and merge loop guards through PHI nodes with multiple incoming Values --- .../Analysis/ScalarEvolution/trip-count.ll | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count.ll b/llvm/test/Analysis/ScalarEvolution/trip-count.ll index 7304409814b0e1..02b451d557513a 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count.ll @@ -293,3 +293,135 @@ exit: ret void } + +define void @slt(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'slt' +; CHECK-NEXT: Determining loop execution counts for: @slt +; CHECK-NEXT: Loop %loop: backedge-taken count is (63 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (63 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp slt i16 %a, 8 + br i1 %cmp1, label %preheader, label %exit + +b2: + %cmp2 = icmp slt i16 %b, 8 + br i1 %cmp2, label %preheader, label %exit + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, 1 + %exitcond = icmp slt i16 %iv.next, 64 + br i1 %exitcond, label %loop, label %exit + +exit: + ret void + +} + +define void @ult(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'ult' +; CHECK-NEXT: Determining loop execution counts for: @ult +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp ult i16 %a, 8 + br i1 %cmp1, label %exit, label %preheader + +b2: + %cmp2 = icmp ult i16 %b, 8 + br i1 %cmp2, label %exit, label %preheader + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, -1 + %exitcond = icmp eq i16 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void + +} + +define void @sgt(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'sgt' +; CHECK-NEXT: Determining loop execution counts for: @sgt +; CHECK-NEXT: Loop %loop: backedge-taken count is %count +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 32767 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %count +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp sgt i16 %a, 8 + br i1 %cmp1, label %preheader, label %exit + +b2: + %cmp2 = icmp sgt i16 %b, 8 + br i1 %cmp2, label %preheader, label %exit + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, -1 + %exitcond = icmp slt i16 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + + +define void @mixed(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'mixed' +; CHECK-NEXT: Determining loop execution counts for: @mixed +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp slt i16 %a, 8 + br i1 %cmp1, label %preheader, label %exit + +b2: + %cmp2 = icmp ult i16 %b, 8 + br i1 %cmp2, label %preheader, label %exit + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, 1 + %exitcond = icmp slt i16 %iv.next, 64 + br i1 %exitcond, label %loop, label %exit + +exit: + ret void + +} From 8d38196d93b84cd44b1c54d54ab99204ee831130 Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Tue, 5 Nov 2024 15:19:49 +0000 Subject: [PATCH 3/9] fixup! fixup! [SCEV] Collect and merge loop guards through PHI nodes with multiple incoming Values --- llvm/include/llvm/Analysis/ScalarEvolution.h | 18 +- llvm/lib/Analysis/ScalarEvolution.cpp | 126 ++++++----- ...t-guard-info-with-multiple-predecessors.ll | 210 +++++++++++++++++ .../Analysis/ScalarEvolution/trip-count.ll | 214 ------------------ 4 files changed, 296 insertions(+), 272 deletions(-) create mode 100644 llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index cdc46cf24a0546..950ffd8649db1a 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1316,10 +1316,24 @@ class ScalarEvolution { LoopGuards(ScalarEvolution &SE) : SE(SE) {} - static LoopGuards + /// Recursively collect loop guards in \p Guards, starting from + /// block \p Block with predecessor \p Pred. The intended starting point + /// is to collect from a loop header and its predecessor. + static void collectFromBlock(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, const BasicBlock *Block, const BasicBlock *Pred, - SmallPtrSet VisitedBlocks); + SmallPtrSet &VisitedBlocks, + unsigned Depth = 0); + + /// Collect loop guards in \p Guards, starting from PHINode \p + /// Phi, by calling \p collectFromBlock on the incoming blocks of + /// \Phi and trying to merge the found constraints into a single + /// combined on for \p Phi. + static void + collectFromPHI(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, + const PHINode &Phi, + SmallPtrSet &VisitedBlocks, + unsigned Depth); public: /// Collect rewrite map for loop guards for loop \p L, together with flags diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index d9cab0471ef0f3..6da882a39cf4e1 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -222,6 +222,11 @@ static cl::opt RangeIterThreshold( cl::desc("Threshold for switching to iteratively computing SCEV ranges"), cl::init(32)); +static cl::opt MaxLoopGuardCollectionDepth( + "scalar-evolution-max-loop-guard-collection-depth", cl::Hidden, + cl::desc("Maximum depth for recrusive loop guard collection"), + cl::init(1)); + static cl::opt ClassifyExpressions("scalar-evolution-classify-expressions", cl::Hidden, cl::init(true), @@ -15220,13 +15225,72 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { BasicBlock *Header = L->getHeader(); BasicBlock *Pred = L->getLoopPredecessor(); LoopGuards Guards(SE); - return collectFromBlock(SE, Guards, Header, Pred, {}); + SmallPtrSet VisitedBlocks; + collectFromBlock(SE, Guards, Header, Pred, VisitedBlocks); + return Guards; +} + +void ScalarEvolution::LoopGuards::collectFromPHI( + ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, + const PHINode &Phi, SmallPtrSet &VisitedBlocks, + unsigned Depth) { + if (!SE.isSCEVable(Phi.getType())) + return; + + using MinMaxPattern = std::pair; + auto GetMinMaxConst = [&](unsigned In) -> MinMaxPattern { + if (!VisitedBlocks.insert(Phi.getIncomingBlock(In)).second) + return {nullptr, scCouldNotCompute}; + LoopGuards G(SE); + collectFromBlock(SE, G, Phi.getParent(), Phi.getIncomingBlock(In), + VisitedBlocks, Depth + 1); + const SCEV *S = G.RewriteMap[SE.getSCEV(Phi.getIncomingValue(In))]; + auto *SM = dyn_cast_if_present(S); + if (!SM) + return {nullptr, scCouldNotCompute}; + if (const SCEVConstant *C0 = dyn_cast(SM->getOperand(0))) + return {C0, SM->getSCEVType()}; + if (const SCEVConstant *C1 = dyn_cast(SM->getOperand(1))) + return {C1, SM->getSCEVType()}; + return {nullptr, scCouldNotCompute}; + }; + auto MergeMinMaxConst = [](MinMaxPattern P1, + MinMaxPattern P2) -> MinMaxPattern { + auto [C1, T1] = P1; + auto [C2, T2] = P2; + if (!C1 || !C2 || T1 != T2) + return {nullptr, scCouldNotCompute}; + switch (T1) { + case scUMaxExpr: + return {C1->getAPInt().ult(C2->getAPInt()) ? C1 : C2, T1}; + case scSMaxExpr: + return {C1->getAPInt().slt(C2->getAPInt()) ? C1 : C2, T1}; + case scUMinExpr: + return {C1->getAPInt().ugt(C2->getAPInt()) ? C1 : C2, T1}; + case scSMinExpr: + return {C1->getAPInt().sgt(C2->getAPInt()) ? C1 : C2, T1}; + default: + llvm_unreachable("Trying to merge non-MinMaxExpr SCEVs."); + } + }; + auto P = GetMinMaxConst(0); + for (unsigned int In = 1; In < Phi.getNumIncomingValues(); In++) { + if (!P.first) + break; + P = MergeMinMaxConst(P, GetMinMaxConst(In)); + } + if (P.first) { + const SCEV *LHS = SE.getSCEV(const_cast(&Phi)); + SmallVector Ops({P.first, LHS}); + const SCEV *RHS = SE.getMinMaxExpr(P.second, Ops); + Guards.RewriteMap.insert({LHS, RHS}); + } } -ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collectFromBlock( +void ScalarEvolution::LoopGuards::collectFromBlock( ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, const BasicBlock *Block, const BasicBlock *Pred, - SmallPtrSet VisitedBlocks) { + SmallPtrSet &VisitedBlocks, unsigned Depth) { SmallVector ExprsToRewrite; auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, @@ -15608,59 +15672,10 @@ ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collectFromBlock( // for PHINodes by recursively following all of their incoming // blocks and try to merge the found conditions to build a new one // for the Phi. - if (Pair.second->hasNPredecessorsOrMore(2)) { + if (Pair.second->hasNPredecessorsOrMore(2) && + Depth < MaxLoopGuardCollectionDepth) { for (auto &Phi : Pair.second->phis()) { - if (!SE.isSCEVable(Phi.getType())) - continue; - - using MinMaxPattern = std::pair; - auto GetMinMaxConst = [&SE, &VisitedBlocks, &Pair, - &Phi](unsigned int In) -> MinMaxPattern { - LoopGuards G(SE); - if (VisitedBlocks.insert(Phi.getIncomingBlock(In)).second) - collectFromBlock(SE, G, Pair.second, Phi.getIncomingBlock(In), - VisitedBlocks); - const SCEV *S = G.RewriteMap[SE.getSCEV(Phi.getIncomingValue(In))]; - auto *SM = dyn_cast_if_present(S); - if (!SM) - return {nullptr, scCouldNotCompute}; - if (const SCEVConstant *C0 = dyn_cast(SM->getOperand(0))) - return {C0, SM->getSCEVType()}; - if (const SCEVConstant *C1 = dyn_cast(SM->getOperand(1))) - return {C1, SM->getSCEVType()}; - return {nullptr, scCouldNotCompute}; - }; - auto MergeMinMaxConst = [](MinMaxPattern P1, - MinMaxPattern P2) -> MinMaxPattern { - auto [C1, T1] = P1; - auto [C2, T2] = P2; - if (!C1 || !C2 || T1 != T2) - return {nullptr, scCouldNotCompute}; - switch (T1) { - case scUMaxExpr: - return {C1->getAPInt().ult(C2->getAPInt()) ? C1 : C2, T1}; - case scSMaxExpr: - return {C1->getAPInt().slt(C2->getAPInt()) ? C1 : C2, T1}; - case scUMinExpr: - return {C1->getAPInt().ugt(C2->getAPInt()) ? C1 : C2, T1}; - case scSMinExpr: - return {C1->getAPInt().sgt(C2->getAPInt()) ? C1 : C2, T1}; - default: - llvm_unreachable("Trying to merge non-MinMaxExpr SCEVs."); - } - }; - auto P = GetMinMaxConst(0); - for (unsigned int In = 1; In < Phi.getNumIncomingValues(); In++) { - if (!P.first) - break; - P = MergeMinMaxConst(P, GetMinMaxConst(In)); - } - if (P.first) { - const SCEV *LHS = SE.getSCEV(const_cast(&Phi)); - SmallVector Ops({P.first, LHS}); - const SCEV *RHS = SE.getMinMaxExpr(P.second, Ops); - Guards.RewriteMap.insert({LHS, RHS}); - } + collectFromPHI(SE, Guards, Phi, VisitedBlocks, Depth); } } @@ -15718,7 +15733,6 @@ ScalarEvolution::LoopGuards ScalarEvolution::LoopGuards::collectFromBlock( Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)}); } } - return Guards; } const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const { diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll new file mode 100644 index 00000000000000..c5b1a3ea2d0e5d --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll @@ -0,0 +1,210 @@ +; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s + +define void @epilogue(i64 %count) { +; CHECK-LABEL: 'epilogue' +; CHECK-NEXT: Determining loop execution counts for: @epilogue +; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 6 +; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 +; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: Trip multiple is 1 +entry: + %cmp = icmp ugt i64 %count, 7 + br i1 %cmp, label %while.body, label %epilogue.preheader + +while.body: + %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] + %sub = add i64 %iv, -8 + %exitcond.not = icmp ugt i64 %sub, 7 + br i1 %exitcond.not, label %while.body, label %while.loopexit + +while.loopexit: + %sub.exit = phi i64 [ %sub, %while.body ] + br label %epilogue.preheader + +epilogue.preheader: + %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] + %epilogue.cmp = icmp eq i64 %count.epilogue, 0 + br i1 %epilogue.cmp, label %exit, label %epilogue + +epilogue: + %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] + %dec = add i64 %iv.epilogue, -1 + %exitcond.epilogue = icmp eq i64 %dec, 0 + br i1 %exitcond.epilogue, label %exit, label %epilogue + +exit: + ret void +} + +define void @epilogue2(i64 %count) { +; CHECK-LABEL: 'epilogue2' +; CHECK-NEXT: Determining loop execution counts for: @epilogue2 +; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 8 +; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 +; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: Trip multiple is 1 +entry: + %cmp = icmp ugt i64 %count, 9 + br i1 %cmp, label %while.body, label %epilogue.preheader + +while.body: + %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] + %sub = add i64 %iv, -8 + %exitcond.not = icmp ugt i64 %sub, 7 + br i1 %exitcond.not, label %while.body, label %while.loopexit + +while.loopexit: + %sub.exit = phi i64 [ %sub, %while.body ] + br label %epilogue.preheader + +epilogue.preheader: + %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] + %epilogue.cmp = icmp eq i64 %count.epilogue, 0 + br i1 %epilogue.cmp, label %exit, label %epilogue + +epilogue: + %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] + %dec = add i64 %iv.epilogue, -1 + %exitcond.epilogue = icmp eq i64 %dec, 0 + br i1 %exitcond.epilogue, label %exit, label %epilogue + +exit: + ret void +} + +define void @slt(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'slt' +; CHECK-NEXT: Determining loop execution counts for: @slt +; CHECK-NEXT: Loop %loop: backedge-taken count is (63 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (63 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp slt i16 %a, 8 + br i1 %cmp1, label %preheader, label %exit + +b2: + %cmp2 = icmp slt i16 %b, 8 + br i1 %cmp2, label %preheader, label %exit + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, 1 + %exitcond = icmp slt i16 %iv.next, 64 + br i1 %exitcond, label %loop, label %exit + +exit: + ret void +} + +define void @ult(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'ult' +; CHECK-NEXT: Determining loop execution counts for: @ult +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp ult i16 %a, 8 + br i1 %cmp1, label %exit, label %preheader + +b2: + %cmp2 = icmp ult i16 %b, 8 + br i1 %cmp2, label %exit, label %preheader + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, -1 + %exitcond = icmp eq i16 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @sgt(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'sgt' +; CHECK-NEXT: Determining loop execution counts for: @sgt +; CHECK-NEXT: Loop %loop: backedge-taken count is %count +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 32767 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %count +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp sgt i16 %a, 8 + br i1 %cmp1, label %preheader, label %exit + +b2: + %cmp2 = icmp sgt i16 %b, 8 + br i1 %cmp2, label %preheader, label %exit + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, -1 + %exitcond = icmp slt i16 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + + +define void @mixed(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'mixed' +; CHECK-NEXT: Determining loop execution counts for: @mixed +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp slt i16 %a, 8 + br i1 %cmp1, label %preheader, label %exit + +b2: + %cmp2 = icmp ult i16 %b, 8 + br i1 %cmp2, label %preheader, label %exit + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + br label %loop + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, 1 + %exitcond = icmp slt i16 %iv.next, 64 + br i1 %exitcond, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count.ll b/llvm/test/Analysis/ScalarEvolution/trip-count.ll index 02b451d557513a..8fc5b9b4096127 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count.ll @@ -211,217 +211,3 @@ for.body: exit: ret void } - -define void @epilogue(i64 %count) { -; CHECK-LABEL: 'epilogue' -; CHECK-NEXT: Determining loop execution counts for: @epilogue -; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 6 -; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 -; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 -; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: Trip multiple is 1 -entry: - %cmp = icmp ugt i64 %count, 7 - br i1 %cmp, label %while.body, label %epilogue.preheader - -while.body: - %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] - %sub = add i64 %iv, -8 - %exitcond.not = icmp ugt i64 %sub, 7 - br i1 %exitcond.not, label %while.body, label %while.loopexit - -while.loopexit: - %sub.exit = phi i64 [ %sub, %while.body ] - br label %epilogue.preheader - -epilogue.preheader: - %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] - %epilogue.cmp = icmp eq i64 %count.epilogue, 0 - br i1 %epilogue.cmp, label %exit, label %epilogue - -epilogue: - %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] - %dec = add i64 %iv.epilogue, -1 - %exitcond.epilogue = icmp eq i64 %dec, 0 - br i1 %exitcond.epilogue, label %exit, label %epilogue - -exit: - ret void - -} - -define void @epilogue2(i64 %count) { -; CHECK-LABEL: 'epilogue2' -; CHECK-NEXT: Determining loop execution counts for: @epilogue2 -; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 8 -; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 -; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 -; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: Trip multiple is 1 -entry: - %cmp = icmp ugt i64 %count, 9 - br i1 %cmp, label %while.body, label %epilogue.preheader - -while.body: - %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] - %sub = add i64 %iv, -8 - %exitcond.not = icmp ugt i64 %sub, 7 - br i1 %exitcond.not, label %while.body, label %while.loopexit - -while.loopexit: - %sub.exit = phi i64 [ %sub, %while.body ] - br label %epilogue.preheader - -epilogue.preheader: - %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] - %epilogue.cmp = icmp eq i64 %count.epilogue, 0 - br i1 %epilogue.cmp, label %exit, label %epilogue - -epilogue: - %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] - %dec = add i64 %iv.epilogue, -1 - %exitcond.epilogue = icmp eq i64 %dec, 0 - br i1 %exitcond.epilogue, label %exit, label %epilogue - -exit: - ret void - -} - -define void @slt(i16 %a, i16 %b, i1 %c) { -; CHECK-LABEL: 'slt' -; CHECK-NEXT: Determining loop execution counts for: @slt -; CHECK-NEXT: Loop %loop: backedge-taken count is (63 + (-1 * %count)) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (63 + (-1 * %count)) -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -entry: - br i1 %c, label %b1, label %b2 - -b1: - %cmp1 = icmp slt i16 %a, 8 - br i1 %cmp1, label %preheader, label %exit - -b2: - %cmp2 = icmp slt i16 %b, 8 - br i1 %cmp2, label %preheader, label %exit - -preheader: - %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop - -loop: - %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] - %iv.next = add i16 %iv, 1 - %exitcond = icmp slt i16 %iv.next, 64 - br i1 %exitcond, label %loop, label %exit - -exit: - ret void - -} - -define void @ult(i16 %a, i16 %b, i1 %c) { -; CHECK-LABEL: 'ult' -; CHECK-NEXT: Determining loop execution counts for: @ult -; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -2 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -entry: - br i1 %c, label %b1, label %b2 - -b1: - %cmp1 = icmp ult i16 %a, 8 - br i1 %cmp1, label %exit, label %preheader - -b2: - %cmp2 = icmp ult i16 %b, 8 - br i1 %cmp2, label %exit, label %preheader - -preheader: - %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop - -loop: - %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] - %iv.next = add i16 %iv, -1 - %exitcond = icmp eq i16 %iv.next, 0 - br i1 %exitcond, label %exit, label %loop - -exit: - ret void - -} - -define void @sgt(i16 %a, i16 %b, i1 %c) { -; CHECK-LABEL: 'sgt' -; CHECK-NEXT: Determining loop execution counts for: @sgt -; CHECK-NEXT: Loop %loop: backedge-taken count is %count -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 32767 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %count -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -entry: - br i1 %c, label %b1, label %b2 - -b1: - %cmp1 = icmp sgt i16 %a, 8 - br i1 %cmp1, label %preheader, label %exit - -b2: - %cmp2 = icmp sgt i16 %b, 8 - br i1 %cmp2, label %preheader, label %exit - -preheader: - %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop - -loop: - %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] - %iv.next = add i16 %iv, -1 - %exitcond = icmp slt i16 %iv.next, 0 - br i1 %exitcond, label %exit, label %loop - -exit: - ret void -} - - -define void @mixed(i16 %a, i16 %b, i1 %c) { -; CHECK-LABEL: 'mixed' -; CHECK-NEXT: Determining loop execution counts for: @mixed -; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) -; CHECK-NEXT: Loop %loop: Trip multiple is 1 -entry: - br i1 %c, label %b1, label %b2 - -b1: - %cmp1 = icmp slt i16 %a, 8 - br i1 %cmp1, label %preheader, label %exit - -b2: - %cmp2 = icmp ult i16 %b, 8 - br i1 %cmp2, label %preheader, label %exit - -preheader: - %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop - -loop: - %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] - %iv.next = add i16 %iv, 1 - %exitcond = icmp slt i16 %iv.next, 64 - br i1 %exitcond, label %loop, label %exit - -exit: - ret void - -} From bda368db295f12bdcddaa74e54093bb05bc87c38 Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Fri, 8 Nov 2024 13:01:52 +0000 Subject: [PATCH 4/9] Fix formatting --- llvm/lib/Analysis/ScalarEvolution.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 6da882a39cf4e1..479c2317f440ee 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -224,8 +224,7 @@ static cl::opt RangeIterThreshold( static cl::opt MaxLoopGuardCollectionDepth( "scalar-evolution-max-loop-guard-collection-depth", cl::Hidden, - cl::desc("Maximum depth for recrusive loop guard collection"), - cl::init(1)); + cl::desc("Maximum depth for recrusive loop guard collection"), cl::init(1)); static cl::opt ClassifyExpressions("scalar-evolution-classify-expressions", From e4d5a7662efb0af8c49bffb53a3ed6fd24848a27 Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Mon, 11 Nov 2024 13:12:48 +0000 Subject: [PATCH 5/9] Update tests --- ...t-guard-info-with-multiple-predecessors.ll | 287 +++++++++++------- 1 file changed, 178 insertions(+), 109 deletions(-) diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll index c5b1a3ea2d0e5d..71d66ef04ade12 100644 --- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-with-multiple-predecessors.ll @@ -1,138 +1,126 @@ ; RUN: opt < %s -disable-output "-passes=print" -scalar-evolution-max-iterations=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s -define void @epilogue(i64 %count) { -; CHECK-LABEL: 'epilogue' -; CHECK-NEXT: Determining loop execution counts for: @epilogue -; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 6 -; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 -; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 -; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: Trip multiple is 1 +define void @slt(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'slt' +; CHECK-NEXT: Determining loop execution counts for: @slt +; CHECK-NEXT: Loop %loop: backedge-taken count is (19 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 18 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (19 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 entry: - %cmp = icmp ugt i64 %count, 7 - br i1 %cmp, label %while.body, label %epilogue.preheader + br i1 %c, label %b1, label %b2 -while.body: - %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] - %sub = add i64 %iv, -8 - %exitcond.not = icmp ugt i64 %sub, 7 - br i1 %exitcond.not, label %while.body, label %while.loopexit +b1: + %cmp1 = icmp slt i16 %a, 1 + br i1 %cmp1, label %exit, label %preheader -while.loopexit: - %sub.exit = phi i64 [ %sub, %while.body ] - br label %epilogue.preheader +b2: + %cmp2 = icmp slt i16 %b, 4 + br i1 %cmp2, label %exit, label %preheader -epilogue.preheader: - %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] - %epilogue.cmp = icmp eq i64 %count.epilogue, 0 - br i1 %epilogue.cmp, label %exit, label %epilogue +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + %cmp3 = icmp sle i16 %count, 19 + br i1 %cmp3, label %loop, label %exit -epilogue: - %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] - %dec = add i64 %iv.epilogue, -1 - %exitcond.epilogue = icmp eq i64 %dec, 0 - br i1 %exitcond.epilogue, label %exit, label %epilogue +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, 1 + %exitcond = icmp eq i16 %iv.next, 20 + br i1 %exitcond, label %exit, label %loop exit: ret void } -define void @epilogue2(i64 %count) { -; CHECK-LABEL: 'epilogue2' -; CHECK-NEXT: Determining loop execution counts for: @epilogue2 -; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 8 -; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) -; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 -; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 -; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) -; CHECK-NEXT: Loop %while.body: Trip multiple is 1 +define void @ult(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'ult' +; CHECK-NEXT: Determining loop execution counts for: @ult +; CHECK-NEXT: Loop %loop: backedge-taken count is (21 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 19 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (21 + (-1 * %count)) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 entry: - %cmp = icmp ugt i64 %count, 9 - br i1 %cmp, label %while.body, label %epilogue.preheader + br i1 %c, label %b1, label %b2 -while.body: - %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] - %sub = add i64 %iv, -8 - %exitcond.not = icmp ugt i64 %sub, 7 - br i1 %exitcond.not, label %while.body, label %while.loopexit +b1: + %cmp1 = icmp ult i16 %a, 2 + br i1 %cmp1, label %exit, label %preheader -while.loopexit: - %sub.exit = phi i64 [ %sub, %while.body ] - br label %epilogue.preheader +b2: + %cmp2 = icmp ult i16 %b, 5 + br i1 %cmp2, label %exit, label %preheader -epilogue.preheader: - %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] - %epilogue.cmp = icmp eq i64 %count.epilogue, 0 - br i1 %epilogue.cmp, label %exit, label %epilogue +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + %cmp3 = icmp ule i16 %count, 20 + br i1 %cmp3, label %loop, label %exit -epilogue: - %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] - %dec = add i64 %iv.epilogue, -1 - %exitcond.epilogue = icmp eq i64 %dec, 0 - br i1 %exitcond.epilogue, label %exit, label %epilogue +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, 1 + %exitcond = icmp eq i16 %iv.next, 22 + br i1 %exitcond, label %exit, label %loop exit: ret void } -define void @slt(i16 %a, i16 %b, i1 %c) { -; CHECK-LABEL: 'slt' -; CHECK-NEXT: Determining loop execution counts for: @slt -; CHECK-NEXT: Loop %loop: backedge-taken count is (63 + (-1 * %count)) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (63 + (-1 * %count)) +define void @sgt(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'sgt' +; CHECK-NEXT: Determining loop execution counts for: @sgt +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 9 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) ; CHECK-NEXT: Loop %loop: Trip multiple is 1 entry: br i1 %c, label %b1, label %b2 b1: - %cmp1 = icmp slt i16 %a, 8 - br i1 %cmp1, label %preheader, label %exit + %cmp1 = icmp sgt i16 %a, 10 + br i1 %cmp1, label %exit, label %preheader b2: - %cmp2 = icmp slt i16 %b, 8 - br i1 %cmp2, label %preheader, label %exit + %cmp2 = icmp sgt i16 %b, 8 + br i1 %cmp2, label %exit, label %preheader preheader: %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop + %cmp3 = icmp sge i16 %count, 1 + br i1 %cmp3, label %loop, label %exit loop: %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] - %iv.next = add i16 %iv, 1 - %exitcond = icmp slt i16 %iv.next, 64 - br i1 %exitcond, label %loop, label %exit + %iv.next = add i16 %iv, -1 + %exitcond = icmp eq i16 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop exit: ret void } -define void @ult(i16 %a, i16 %b, i1 %c) { -; CHECK-LABEL: 'ult' -; CHECK-NEXT: Determining loop execution counts for: @ult -; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -2 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) +define void @ugt(i16 %a, i16 %b, i1 %c) { +; CHECK-LABEL: 'ugt' +; CHECK-NEXT: Determining loop execution counts for: @ugt +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 10 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) ; CHECK-NEXT: Loop %loop: Trip multiple is 1 entry: br i1 %c, label %b1, label %b2 b1: - %cmp1 = icmp ult i16 %a, 8 + %cmp1 = icmp ugt i16 %a, 11 br i1 %cmp1, label %exit, label %preheader b2: - %cmp2 = icmp ult i16 %b, 8 + %cmp2 = icmp ugt i16 %b, 7 br i1 %cmp2, label %exit, label %preheader preheader: %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop + %cmp3 = icmp ne i16 %count, 0 + br i1 %cmp3, label %loop, label %exit loop: %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] @@ -144,66 +132,147 @@ exit: ret void } -define void @sgt(i16 %a, i16 %b, i1 %c) { -; CHECK-LABEL: 'sgt' -; CHECK-NEXT: Determining loop execution counts for: @sgt -; CHECK-NEXT: Loop %loop: backedge-taken count is %count -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 32767 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %count +define void @three_incoming(i16 %a, i16 %b, i1 %c, i1 %d) { +; CHECK-LABEL: 'three_incoming' +; CHECK-NEXT: Determining loop execution counts for: @three_incoming +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 11 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) ; CHECK-NEXT: Loop %loop: Trip multiple is 1 entry: - br i1 %c, label %b1, label %b2 + br i1 %c, label %b1, label %entry2 + +entry2: + br i1 %d, label %b2, label %b3 b1: - %cmp1 = icmp sgt i16 %a, 8 - br i1 %cmp1, label %preheader, label %exit + %cmp1 = icmp ugt i16 %a, 10 + br i1 %cmp1, label %exit, label %preheader b2: - %cmp2 = icmp sgt i16 %b, 8 - br i1 %cmp2, label %preheader, label %exit + %cmp2 = icmp ugt i16 %b, 8 + br i1 %cmp2, label %exit, label %preheader + +b3: + %cmp3 = icmp ugt i16 %b, 12 + br i1 %cmp3, label %exit, label %preheader preheader: - %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ], [ %b, %b3 ] + %cmp4 = icmp ne i16 %count, 0 + br i1 %cmp4, label %loop, label %exit loop: %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] %iv.next = add i16 %iv, -1 - %exitcond = icmp slt i16 %iv.next, 0 + %exitcond = icmp eq i16 %iv.next, 0 br i1 %exitcond, label %exit, label %loop exit: ret void } - define void @mixed(i16 %a, i16 %b, i1 %c) { ; CHECK-LABEL: 'mixed' ; CHECK-NEXT: Determining loop execution counts for: @mixed -; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) -; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -32704 -; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (-1 * %count) + (64 smax (1 + %count))) +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) ; CHECK-NEXT: Loop %loop: Trip multiple is 1 entry: br i1 %c, label %b1, label %b2 b1: - %cmp1 = icmp slt i16 %a, 8 - br i1 %cmp1, label %preheader, label %exit + %cmp1 = icmp ugt i16 %a, 10 + br i1 %cmp1, label %exit, label %preheader b2: - %cmp2 = icmp ult i16 %b, 8 - br i1 %cmp2, label %preheader, label %exit + %cmp2 = icmp sgt i16 %b, 8 + br i1 %cmp2, label %exit, label %preheader preheader: %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] - br label %loop + %cmp3 = icmp ne i16 %count, 0 + br i1 %cmp3, label %loop, label %exit loop: %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] - %iv.next = add i16 %iv, 1 - %exitcond = icmp slt i16 %iv.next, 64 - br i1 %exitcond, label %loop, label %exit + %iv.next = add i16 %iv, -1 + %exitcond = icmp eq i16 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @one_constant(i16 %a, i16 %b, i1 %c, i16 %d) { +; CHECK-LABEL: 'one_constant' +; CHECK-NEXT: Determining loop execution counts for: @one_constant +; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i16 -2 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + %count) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 +entry: + br i1 %c, label %b1, label %b2 + +b1: + %cmp1 = icmp ugt i16 %a, 10 + br i1 %cmp1, label %exit, label %preheader + +b2: + %cmp2 = icmp ugt i16 %b, %d + br i1 %cmp2, label %exit, label %preheader + +preheader: + %count = phi i16 [ %a, %b1 ], [ %b, %b2 ] + %cmp3 = icmp ne i16 %count, 0 + br i1 %cmp3, label %loop, label %exit + +loop: + %iv = phi i16 [ %iv.next, %loop ], [ %count, %preheader ] + %iv.next = add i16 %iv, -1 + %exitcond = icmp eq i16 %iv.next, 0 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @epilogue(i64 %count) { +; CHECK-LABEL: 'epilogue' +; CHECK-NEXT: Determining loop execution counts for: @epilogue +; CHECK-NEXT: Loop %epilogue: backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: constant max backedge-taken count is i64 6 +; CHECK-NEXT: Loop %epilogue: symbolic max backedge-taken count is (-1 + %count.epilogue) +; CHECK-NEXT: Loop %epilogue: Trip multiple is 1 +; CHECK-NEXT: Loop %while.body: backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: constant max backedge-taken count is i64 2305843009213693951 +; CHECK-NEXT: Loop %while.body: symbolic max backedge-taken count is ((-8 + %count) /u 8) +; CHECK-NEXT: Loop %while.body: Trip multiple is 1 +entry: + %cmp = icmp ugt i64 %count, 7 + br i1 %cmp, label %while.body, label %epilogue.preheader + +while.body: + %iv = phi i64 [ %sub, %while.body ], [ %count, %entry ] + %sub = add i64 %iv, -8 + %exitcond.not = icmp ugt i64 %sub, 7 + br i1 %exitcond.not, label %while.body, label %while.loopexit + +while.loopexit: + %sub.exit = phi i64 [ %sub, %while.body ] + br label %epilogue.preheader + +epilogue.preheader: + %count.epilogue = phi i64 [ %count, %entry ], [ %sub.exit, %while.loopexit ] + %epilogue.cmp = icmp eq i64 %count.epilogue, 0 + br i1 %epilogue.cmp, label %exit, label %epilogue + +epilogue: + %iv.epilogue = phi i64 [ %dec, %epilogue ], [ %count.epilogue, %epilogue.preheader ] + %dec = add i64 %iv.epilogue, -1 + %exitcond.epilogue = icmp eq i64 %dec, 0 + br i1 %exitcond.epilogue, label %exit, label %epilogue exit: ret void From 59da748a8780a0ad0fe4fd2987e628bcf638c15a Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Mon, 11 Nov 2024 19:15:29 +0000 Subject: [PATCH 6/9] Stop collecting guards after 2 predecessors in the recursive case --- llvm/lib/Analysis/ScalarEvolution.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 479c2317f440ee..daf7ad1e2d3464 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15665,6 +15665,10 @@ void ScalarEvolution::LoopGuards::collectFromBlock( Terms.emplace_back(LoopEntryPredicate->getCondition(), LoopEntryPredicate->getSuccessor(0) == Pair.second); + + // If we are recursively collecting guards stop after 2 predecessors. + if (Depth > 0 && Terms.size() == 2) + break; } // Finally, if we stopped climbing the predecessor chain because // there wasn't a unique one to continue, try to collect conditions From 772fc502f55fab391e6b0c213bb152d448dc94c9 Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Tue, 12 Nov 2024 16:04:00 +0000 Subject: [PATCH 7/9] Collect loop guards only once per predecessor --- llvm/include/llvm/Analysis/ScalarEvolution.h | 12 ++++---- llvm/lib/Analysis/ScalarEvolution.cpp | 30 +++++++++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 950ffd8649db1a..c254443e6ed018 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1322,18 +1322,18 @@ class ScalarEvolution { static void collectFromBlock(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, const BasicBlock *Block, const BasicBlock *Pred, - SmallPtrSet &VisitedBlocks, + SmallPtrSetImpl &VisitedBlocks, unsigned Depth = 0); /// Collect loop guards in \p Guards, starting from PHINode \p /// Phi, by calling \p collectFromBlock on the incoming blocks of /// \Phi and trying to merge the found constraints into a single /// combined on for \p Phi. - static void - collectFromPHI(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, - const PHINode &Phi, - SmallPtrSet &VisitedBlocks, - unsigned Depth); + static void collectFromPHI( + ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, + const PHINode &Phi, SmallPtrSetImpl &VisitedBlocks, + SmallDenseMap &IncomingGuards, + unsigned Depth); public: /// Collect rewrite map for loop guards for loop \p L, together with flags diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index daf7ad1e2d3464..01ac37dda988c3 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15231,20 +15231,28 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) { void ScalarEvolution::LoopGuards::collectFromPHI( ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, - const PHINode &Phi, SmallPtrSet &VisitedBlocks, + const PHINode &Phi, SmallPtrSetImpl &VisitedBlocks, + SmallDenseMap &IncomingGuards, unsigned Depth) { if (!SE.isSCEVable(Phi.getType())) return; using MinMaxPattern = std::pair; auto GetMinMaxConst = [&](unsigned In) -> MinMaxPattern { - if (!VisitedBlocks.insert(Phi.getIncomingBlock(In)).second) + const BasicBlock *InBlock = Phi.getIncomingBlock(In); + if (!VisitedBlocks.insert(InBlock).second) return {nullptr, scCouldNotCompute}; - LoopGuards G(SE); - collectFromBlock(SE, G, Phi.getParent(), Phi.getIncomingBlock(In), - VisitedBlocks, Depth + 1); - const SCEV *S = G.RewriteMap[SE.getSCEV(Phi.getIncomingValue(In))]; - auto *SM = dyn_cast_if_present(S); + if (!IncomingGuards.contains(InBlock)) { + LoopGuards G(SE); + collectFromBlock(SE, G, Phi.getParent(), InBlock, VisitedBlocks, + Depth + 1); + IncomingGuards.try_emplace(InBlock, std::move(G)); + } + const LoopGuards &G = IncomingGuards.at(InBlock); + auto S = G.RewriteMap.find(SE.getSCEV(Phi.getIncomingValue(In))); + if (S == G.RewriteMap.end()) + return {nullptr, scCouldNotCompute}; + auto *SM = dyn_cast_if_present(S->second); if (!SM) return {nullptr, scCouldNotCompute}; if (const SCEVConstant *C0 = dyn_cast(SM->getOperand(0))) @@ -15289,7 +15297,7 @@ void ScalarEvolution::LoopGuards::collectFromPHI( void ScalarEvolution::LoopGuards::collectFromBlock( ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards, const BasicBlock *Block, const BasicBlock *Pred, - SmallPtrSet &VisitedBlocks, unsigned Depth) { + SmallPtrSetImpl &VisitedBlocks, unsigned Depth) { SmallVector ExprsToRewrite; auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS, const SCEV *RHS, @@ -15666,7 +15674,8 @@ void ScalarEvolution::LoopGuards::collectFromBlock( Terms.emplace_back(LoopEntryPredicate->getCondition(), LoopEntryPredicate->getSuccessor(0) == Pair.second); - // If we are recursively collecting guards stop after 2 predecessors. + // If we are recursively collecting guards stop after 2 + // predecessors to limit compile-time impact for now. if (Depth > 0 && Terms.size() == 2) break; } @@ -15677,8 +15686,9 @@ void ScalarEvolution::LoopGuards::collectFromBlock( // for the Phi. if (Pair.second->hasNPredecessorsOrMore(2) && Depth < MaxLoopGuardCollectionDepth) { + SmallDenseMap IncomingGuards; for (auto &Phi : Pair.second->phis()) { - collectFromPHI(SE, Guards, Phi, VisitedBlocks, Depth); + collectFromPHI(SE, Guards, Phi, VisitedBlocks, IncomingGuards, Depth); } } From 33209565fb1e91cf7f250a85dfa7e7694f628e1a Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Tue, 12 Nov 2024 22:40:33 +0000 Subject: [PATCH 8/9] fixup! Collect loop guards only once per predecessor --- llvm/lib/Analysis/ScalarEvolution.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 01ac37dda988c3..0c824f687554e5 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15238,8 +15238,8 @@ void ScalarEvolution::LoopGuards::collectFromPHI( return; using MinMaxPattern = std::pair; - auto GetMinMaxConst = [&](unsigned In) -> MinMaxPattern { - const BasicBlock *InBlock = Phi.getIncomingBlock(In); + auto GetMinMaxConst = [&](unsigned IncomingIdx) -> MinMaxPattern { + const BasicBlock *InBlock = Phi.getIncomingBlock(IncomingIdx); if (!VisitedBlocks.insert(InBlock).second) return {nullptr, scCouldNotCompute}; if (!IncomingGuards.contains(InBlock)) { @@ -15249,7 +15249,7 @@ void ScalarEvolution::LoopGuards::collectFromPHI( IncomingGuards.try_emplace(InBlock, std::move(G)); } const LoopGuards &G = IncomingGuards.at(InBlock); - auto S = G.RewriteMap.find(SE.getSCEV(Phi.getIncomingValue(In))); + auto S = G.RewriteMap.find(SE.getSCEV(Phi.getIncomingValue(IncomingIdx))); if (S == G.RewriteMap.end()) return {nullptr, scCouldNotCompute}; auto *SM = dyn_cast_if_present(S->second); @@ -15687,9 +15687,8 @@ void ScalarEvolution::LoopGuards::collectFromBlock( if (Pair.second->hasNPredecessorsOrMore(2) && Depth < MaxLoopGuardCollectionDepth) { SmallDenseMap IncomingGuards; - for (auto &Phi : Pair.second->phis()) { + for (auto &Phi : Pair.second->phis()) collectFromPHI(SE, Guards, Phi, VisitedBlocks, IncomingGuards, Depth); - } } // Now apply the information from the collected conditions to From 36f56265a6b903269f68c8c9cf4186ad114d5150 Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Thu, 14 Nov 2024 15:46:04 +0000 Subject: [PATCH 9/9] fixup! fixup! Collect loop guards only once per predecessor --- llvm/lib/Analysis/ScalarEvolution.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 0c824f687554e5..8ab40dbbab7751 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15242,15 +15242,13 @@ void ScalarEvolution::LoopGuards::collectFromPHI( const BasicBlock *InBlock = Phi.getIncomingBlock(IncomingIdx); if (!VisitedBlocks.insert(InBlock).second) return {nullptr, scCouldNotCompute}; - if (!IncomingGuards.contains(InBlock)) { - LoopGuards G(SE); - collectFromBlock(SE, G, Phi.getParent(), InBlock, VisitedBlocks, + auto [G, Inserted] = IncomingGuards.try_emplace(InBlock, LoopGuards(SE)); + if (Inserted) + collectFromBlock(SE, G->second, Phi.getParent(), InBlock, VisitedBlocks, Depth + 1); - IncomingGuards.try_emplace(InBlock, std::move(G)); - } - const LoopGuards &G = IncomingGuards.at(InBlock); - auto S = G.RewriteMap.find(SE.getSCEV(Phi.getIncomingValue(IncomingIdx))); - if (S == G.RewriteMap.end()) + auto S = G->second.RewriteMap.find( + SE.getSCEV(Phi.getIncomingValue(IncomingIdx))); + if (S == G->second.RewriteMap.end()) return {nullptr, scCouldNotCompute}; auto *SM = dyn_cast_if_present(S->second); if (!SM)