Skip to content

Commit

Permalink
[SCEV] Collect and merge loop guards through PHI nodes with multiple …
Browse files Browse the repository at this point in the history
…incoming values (llvm#113915)

This patch aims to strengthen collection of loop guards by processing
PHI nodes with multiple incoming values as follows: collect guards for
all incoming values/blocks and try to merge them into a single one for
the PHI node.

The goal is to determine tighter bounds on the trip counts of scalar
tail loops after vectorization, helping to avoid unnecessary transforms.
In particular we'd like to avoid vectorizing scalar tails of
hand-vectorized loops, for example in
[Transforms/PhaseOrdering/X86/pr38280.ll](https://github.com/llvm/llvm-project/blob/231e03ba7e82896847dbc27d457dbb208f04699c/llvm/test/Transforms/PhaseOrdering/X86/pr38280.ll),
discovered via llvm#108190

Compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=a55248789ed3f653740e0723d016203b9d585f26&to=500e4c46e79f60b93b11a752698c520e345948e3&stat=instructions:u

PR: llvm#113915
  • Loading branch information
juliannagele authored and fhahn committed Nov 15, 2024
1 parent 1bd1ae9 commit e403d25
Show file tree
Hide file tree
Showing 4 changed files with 400 additions and 11 deletions.
19 changes: 19 additions & 0 deletions llvm/include/llvm/Analysis/ScalarEvolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,25 @@ class ScalarEvolution {

LoopGuards(ScalarEvolution &SE) : SE(SE) {}

/// Recursively collect loop guards in \p Guards, starting from
/// block \p Block with predecessor \p Pred. The intended starting point
/// is to collect from a loop header and its predecessor.
static void
collectFromBlock(ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const BasicBlock *Block, const BasicBlock *Pred,
SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks,
unsigned Depth = 0);

/// Collect loop guards in \p Guards, starting from PHINode \p
/// Phi, by calling \p collectFromBlock on the incoming blocks of
/// \Phi and trying to merge the found constraints into a single
/// combined on for \p Phi.
static void collectFromPHI(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const PHINode &Phi, SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks,
SmallDenseMap<const BasicBlock *, LoopGuards> &IncomingGuards,
unsigned Depth);

public:
/// Collect rewrite map for loop guards for loop \p L, together with flags
/// indicating if NUW and NSW can be preserved during rewriting.
Expand Down
111 changes: 101 additions & 10 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ static cl::opt<unsigned> RangeIterThreshold(
cl::desc("Threshold for switching to iteratively computing SCEV ranges"),
cl::init(32));

static cl::opt<unsigned> MaxLoopGuardCollectionDepth(
"scalar-evolution-max-loop-guard-collection-depth", cl::Hidden,
cl::desc("Maximum depth for recrusive loop guard collection"), cl::init(1));

static cl::opt<bool>
ClassifyExpressions("scalar-evolution-classify-expressions",
cl::Hidden, cl::init(true),
Expand Down Expand Up @@ -10648,7 +10652,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB)
if (const Loop *L = LI.getLoopFor(BB))
return {L->getLoopPredecessor(), L->getHeader()};

return {nullptr, nullptr};
return {nullptr, BB};
}

/// SCEV structural equivalence is usually sufficient for testing whether two
Expand Down Expand Up @@ -15217,7 +15221,81 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS,

ScalarEvolution::LoopGuards
ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
BasicBlock *Header = L->getHeader();
BasicBlock *Pred = L->getLoopPredecessor();
LoopGuards Guards(SE);
SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
collectFromBlock(SE, Guards, Header, Pred, VisitedBlocks);
return Guards;
}

void ScalarEvolution::LoopGuards::collectFromPHI(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const PHINode &Phi, SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks,
SmallDenseMap<const BasicBlock *, LoopGuards> &IncomingGuards,
unsigned Depth) {
if (!SE.isSCEVable(Phi.getType()))
return;

using MinMaxPattern = std::pair<const SCEVConstant *, SCEVTypes>;
auto GetMinMaxConst = [&](unsigned IncomingIdx) -> MinMaxPattern {
const BasicBlock *InBlock = Phi.getIncomingBlock(IncomingIdx);
if (!VisitedBlocks.insert(InBlock).second)
return {nullptr, scCouldNotCompute};
auto [G, Inserted] = IncomingGuards.try_emplace(InBlock, LoopGuards(SE));
if (Inserted)
collectFromBlock(SE, G->second, Phi.getParent(), InBlock, VisitedBlocks,
Depth + 1);
auto S = G->second.RewriteMap.find(
SE.getSCEV(Phi.getIncomingValue(IncomingIdx)));
if (S == G->second.RewriteMap.end())
return {nullptr, scCouldNotCompute};
auto *SM = dyn_cast_if_present<SCEVMinMaxExpr>(S->second);
if (!SM)
return {nullptr, scCouldNotCompute};
if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(SM->getOperand(0)))
return {C0, SM->getSCEVType()};
if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(SM->getOperand(1)))
return {C1, SM->getSCEVType()};
return {nullptr, scCouldNotCompute};
};
auto MergeMinMaxConst = [](MinMaxPattern P1,
MinMaxPattern P2) -> MinMaxPattern {
auto [C1, T1] = P1;
auto [C2, T2] = P2;
if (!C1 || !C2 || T1 != T2)
return {nullptr, scCouldNotCompute};
switch (T1) {
case scUMaxExpr:
return {C1->getAPInt().ult(C2->getAPInt()) ? C1 : C2, T1};
case scSMaxExpr:
return {C1->getAPInt().slt(C2->getAPInt()) ? C1 : C2, T1};
case scUMinExpr:
return {C1->getAPInt().ugt(C2->getAPInt()) ? C1 : C2, T1};
case scSMinExpr:
return {C1->getAPInt().sgt(C2->getAPInt()) ? C1 : C2, T1};
default:
llvm_unreachable("Trying to merge non-MinMaxExpr SCEVs.");
}
};
auto P = GetMinMaxConst(0);
for (unsigned int In = 1; In < Phi.getNumIncomingValues(); In++) {
if (!P.first)
break;
P = MergeMinMaxConst(P, GetMinMaxConst(In));
}
if (P.first) {
const SCEV *LHS = SE.getSCEV(const_cast<PHINode *>(&Phi));
SmallVector<const SCEV *, 2> Ops({P.first, LHS});
const SCEV *RHS = SE.getMinMaxExpr(P.second, Ops);
Guards.RewriteMap.insert({LHS, RHS});
}
}

void ScalarEvolution::LoopGuards::collectFromBlock(
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
const BasicBlock *Block, const BasicBlock *Pred,
SmallPtrSetImpl<const BasicBlock *> &VisitedBlocks, unsigned Depth) {
SmallVector<const SCEV *> ExprsToRewrite;
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
const SCEV *RHS,
Expand Down Expand Up @@ -15556,14 +15634,13 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
}
};

BasicBlock *Header = L->getHeader();
SmallVector<PointerIntPair<Value *, 1, bool>> Terms;
// First, collect information from assumptions dominating the loop.
for (auto &AssumeVH : SE.AC.assumptions()) {
if (!AssumeVH)
continue;
auto *AssumeI = cast<CallInst>(AssumeVH);
if (!SE.DT.dominates(AssumeI, Header))
if (!SE.DT.dominates(AssumeI, Block))
continue;
Terms.emplace_back(AssumeI->getOperand(0), true);
}
Expand All @@ -15574,27 +15651,42 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
if (GuardDecl)
for (const auto *GU : GuardDecl->users())
if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
if (Guard->getFunction() == Header->getParent() &&
SE.DT.dominates(Guard, Header))
if (Guard->getFunction() == Block->getParent() &&
SE.DT.dominates(Guard, Block))
Terms.emplace_back(Guard->getArgOperand(0), true);

// Third, collect conditions from dominating branches. Starting at the loop
// predecessor, climb up the predecessor chain, as long as there are
// predecessors that can be found that have unique successors leading to the
// original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), Header);
Pair.first;
std::pair<const BasicBlock *, const BasicBlock *> Pair(Pred, Block);
for (; Pair.first;
Pair = SE.getPredecessorWithUniqueSuccessorForBB(Pair.first)) {

VisitedBlocks.insert(Pair.second);
const BranchInst *LoopEntryPredicate =
dyn_cast<BranchInst>(Pair.first->getTerminator());
if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional())
continue;

Terms.emplace_back(LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) == Pair.second);

// If we are recursively collecting guards stop after 2
// predecessors to limit compile-time impact for now.
if (Depth > 0 && Terms.size() == 2)
break;
}
// Finally, if we stopped climbing the predecessor chain because
// there wasn't a unique one to continue, try to collect conditions
// for PHINodes by recursively following all of their incoming
// blocks and try to merge the found conditions to build a new one
// for the Phi.
if (Pair.second->hasNPredecessorsOrMore(2) &&
Depth < MaxLoopGuardCollectionDepth) {
SmallDenseMap<const BasicBlock *, LoopGuards> IncomingGuards;
for (auto &Phi : Pair.second->phis())
collectFromPHI(SE, Guards, Phi, VisitedBlocks, IncomingGuards, Depth);
}

// Now apply the information from the collected conditions to
Expand Down Expand Up @@ -15651,7 +15743,6 @@ ScalarEvolution::LoopGuards::collect(const Loop *L, ScalarEvolution &SE) {
Guards.RewriteMap.insert({Expr, Guards.rewrite(RewriteTo)});
}
}
return Guards;
}

const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
Expand Down
Loading

0 comments on commit e403d25

Please sign in to comment.