Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SCEV] Collect and merge loop guards through PHI nodes with multiple incoming values #113915

Merged
merged 9 commits into from
Nov 15, 2024
19 changes: 19 additions & 0 deletions llvm/include/llvm/Analysis/ScalarEvolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,25 @@ class ScalarEvolution {

LoopGuards(ScalarEvolution &SE) : SE(SE) {}

/// Recursively collect loop guards in \p Guards, starting from
/// block \p Block with predecessor \p Pred. The intended starting point
/// is to collect from a loop header and its predecessor.
static void
collectFromBlock(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
juliannagele marked this conversation as resolved.
Show resolved Hide resolved
const BasicBlock *Block, const BasicBlock *Pred,
SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks,
unsigned Depth = 0);

/// Collect loop guards in \p Guards, starting from PHINode \p
/// Phi, by calling \p collectFromBlock on the incoming blocks of
/// \Phi and trying to merge the found constraints into a single
/// combined on for \p Phi.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on -> one

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fixed in feb9b37 (@juliannagele is OOO for few days, so I took care of it)

static void collectFromPHI(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const PHINode &Phi, SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks,
SmallDenseMap<const BasicBlock *, LoopGuards> &IncomingGuards,
unsigned Depth);

public:
/// Collect rewrite map for loop guards for loop \p L, together with flags
/// indicating if NUW and NSW can be preserved during rewriting.
Expand Down
113 changes: 103 additions & 10 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ static cl::opt<unsigned> RangeIterThreshold(
cl::desc("Threshold for switching to iteratively computing SCEV ranges"),
cl::init(32));

static cl::opt<unsigned> MaxLoopGuardCollectionDepth(
"scalar-evolution-max-loop-guard-collection-depth", cl::Hidden,
cl::desc("Maximum depth for recrusive loop guard collection"), cl::init(1));

static cl::opt<bool>
ClassifyExpressions("scalar-evolution-classify-expressions",
cl::Hidden, cl::init(true),
Expand Down Expand Up @@ -10648,7 +10652,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
if (const Loop *L = LI.getLoopFor(BB))
return {L->getLoopPredecessor(), L->getHeader()};

return {nullptr, nullptr};
return {nullptr, BB};
}

/// SCEV structural equivalence is usually sufficient for testing whether two
Expand Down Expand Up @@ -15217,7 +15221,83 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,

ScalarEvolution::LoopGuards
ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
BasicBlock *Header = L->getHeader();
BasicBlock *Pred = L->getLoopPredecessor();
LoopGuards Guards(SE);
SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
collectFromBlock(SE, Guards, Header, Pred, VisitedBlocks);
return Guards;
}

void ScalarEvolution::LoopGuards::collectFromPHI(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const PHINode &Phi, SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks,
SmallDenseMap<const BasicBlock *, LoopGuards> &IncomingGuards,
unsigned Depth) {
if (!SE.isSCEVable(Phi.getType()))
return;

using MinMaxPattern = std::pair<const SCEVConstant *, SCEVTypes>;
auto GetMinMaxConst = [&](unsigned IncomingIdx) -> MinMaxPattern {
const BasicBlock *InBlock = Phi.getIncomingBlock(IncomingIdx);
if (!VisitedBlocks.insert(InBlock).second)
return {nullptr, scCouldNotCompute};
if (!IncomingGuards.contains(InBlock)) {
LoopGuards G(SE);
collectFromBlock(SE, G, Phi.getParent(), InBlock, VisitedBlocks,
Depth + 1);
IncomingGuards.try_emplace(InBlock, std::move(G));
}
const LoopGuards &G = IncomingGuards.at(InBlock);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Could try to avoid the repeated lookup by using IncomingGuards.insert()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

auto S = G.RewriteMap.find(SE.getSCEV(Phi.getIncomingValue(IncomingIdx)));
if (S == G.RewriteMap.end())
return {nullptr, scCouldNotCompute};
auto *SM = dyn_cast_if_present<SCEVMinMaxExpr>(S->second);
if (!SM)
return {nullptr, scCouldNotCompute};
if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(SM->getOperand(0)))
return {C0, SM->getSCEVType()};
if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(SM->getOperand(1)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If SM has a SCEVConstant operand, it will always be operand 0, so the second check is redundant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fixed in feb9b37 (@juliannagele is OOO for few days, so I took care of it)

return {C1, SM->getSCEVType()};
return {nullptr, scCouldNotCompute};
};
auto MergeMinMaxConst = [](MinMaxPattern P1,
MinMaxPattern P2) -> MinMaxPattern {
auto [C1, T1] = P1;
auto [C2, T2] = P2;
if (!C1 || !C2 || T1 != T2)
return {nullptr, scCouldNotCompute};
switch (T1) {
case scUMaxExpr:
return {C1->getAPInt().ult(C2->getAPInt()) ? C1 : C2, T1};
case scSMaxExpr:
return {C1->getAPInt().slt(C2->getAPInt()) ? C1 : C2, T1};
case scUMinExpr:
return {C1->getAPInt().ugt(C2->getAPInt()) ? C1 : C2, T1};
case scSMinExpr:
return {C1->getAPInt().sgt(C2->getAPInt()) ? C1 : C2, T1};
default:
llvm_unreachable("Trying to merge non-MinMaxExpr SCEVs.");
}
};
auto P = GetMinMaxConst(0);
for (unsigned int In = 1; In < Phi.getNumIncomingValues(); In++) {
if (!P.first)
break;
P = MergeMinMaxConst(P, GetMinMaxConst(In));
}
if (P.first) {
const SCEV *LHS = SE.getSCEV(const_cast<PHINode *>(&Phi));
SmallVector<const SCEV *, 2> Ops({P.first, LHS});
const SCEV *RHS = SE.getMinMaxExpr(P.second, Ops);
Guards.RewriteMap.insert({LHS, RHS});
}
}

void ScalarEvolution::LoopGuards::collectFromBlock(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const BasicBlock *Block, const BasicBlock *Pred,
SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks, unsigned Depth) {
SmallVector<const SCEV *> ExprsToRewrite;
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
const SCEV *RHS,
Expand Down Expand Up @@ -15556,14 +15636,13 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
}
};

BasicBlock *Header = L->getHeader();
SmallVector<PointerIntPair<Value *, 1, bool>> Terms;
// First, collect information from assumptions dominating the loop.
for (auto &AssumeVH : SE.AC.assumptions()) {
if (!AssumeVH)
continue;
auto *AssumeI = cast<CallInst>(AssumeVH);
if (!SE.DT.dominates(AssumeI, Header))
if (!SE.DT.dominates(AssumeI, Block))
continue;
Terms.emplace_back(AssumeI->getOperand(0), true);
}
Expand All @@ -15574,27 +15653,42 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
if (GuardDecl)
for (const auto *GU : GuardDecl->users())
if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
if (Guard->getFunction() == Header->getParent() &&
SE.DT.dominates(Guard, Header))
if (Guard->getFunction() == Block->getParent() &&
SE.DT.dominates(Guard, Block))
Terms.emplace_back(Guard->getArgOperand(0), true);

// Third, collect conditions from dominating branches. Starting at the loop
// predecessor, climb up the predecessor chain, as long as there are
// predecessors that can be found that have unique successors leading to the
// original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), Header);
Pair.first;
std::pair<const BasicBlock *, const BasicBlock *> Pair(Pred, Block);
for (; Pair.first;
Pair = SE.getPredecessorWithUniqueSuccessorForBB(Pair.first)) {

VisitedBlocks.insert(Pair.second);
const BranchInst *LoopEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
continue;

Terms.emplace_back(LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) == Pair.second);

// If we are recursively collecting guards stop after 2
// predecessors to limit compile-time impact for now.
if (Depth > 0 && Terms.size() == 2)
break;
}
// Finally, if we stopped climbing the predecessor chain because
// there wasn't a unique one to continue, try to collect conditions
// for PHINodes by recursively following all of their incoming
// blocks and try to merge the found conditions to build a new one
// for the Phi.
if (Pair.second->hasNPredecessorsOrMore(2) &&
Depth < MaxLoopGuardCollectionDepth) {
SmallDenseMap<const BasicBlock *, LoopGuards> IncomingGuards;
for (auto &Phi : Pair.second->phis())
collectFromPHI(SE, Guards, Phi, VisitedBlocks, IncomingGuards, Depth);
}

// Now apply the information from the collected conditions to
Expand Down Expand Up @@ -15651,7 +15745,6 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)});
}
}
return Guards;
}

const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
Expand Down
Loading
Loading