Skip to content

Commit

Permalink
Revert "[VPlan] First step towards VPlan cost modeling. (#92555)"
Browse files Browse the repository at this point in the history
This reverts commit 0079835.

Causes crashes, see comments on #92555.
  • Loading branch information
aeubanks committed Jun 13, 2024
1 parent 8f2a4e8 commit 46080ab
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 398 deletions.
17 changes: 1 addition & 16 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,16 +344,6 @@ class LoopVectorizationPlanner {
/// A builder used to construct the current plan.
VPBuilder Builder;

/// Computes the cost of \p Plan for vectorization factor \p VF.
///
/// The current implementation requires access to the
/// LoopVectorizationLegality to handle inductions and reductions, which is
/// why it is kept separate from the VPlan-only cost infrastructure.
///
/// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
/// been retired.
InstructionCost cost(VPlan &Plan, ElementCount VF) const;

public:
LoopVectorizationPlanner(
Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
Expand All @@ -375,9 +365,6 @@ class LoopVectorizationPlanner {
/// Return the best VPlan for \p VF.
VPlan &getBestPlanFor(ElementCount VF) const;

/// Return the most profitable plan and fix its VF to the most profitable one.
VPlan &getBestPlan() const;

/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
///
Expand Down Expand Up @@ -456,9 +443,7 @@ class LoopVectorizationPlanner {
ElementCount MinVF);

/// \return The most profitable vectorization factor and the cost of that VF.
/// This method checks every VF in \p CandidateVFs. This is now only used to
/// verify the decisions by the new VPlan-based cost-model and will be retired
/// once the VPlan-based cost-model is stabilized.
/// This method checks every VF in \p CandidateVFs.
VectorizationFactor
selectVectorizationFactor(const ElementCountSet &CandidateVFs);

Expand Down
206 changes: 22 additions & 184 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
cl::desc("A flag that overrides the target's max interleave factor for "
"vectorized loops."));

cl::opt<unsigned> ForceTargetInstructionCost(
static cl::opt<unsigned> ForceTargetInstructionCost(
"force-target-instruction-cost", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's expected cost for "
"an instruction to a single constant value. Mostly "
Expand Down Expand Up @@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}

/// A helper function that returns the reciprocal of the block probability of
/// predicated blocks. If we return X, we are assuming the predicated block
/// will execute once for every X iterations of the loop header.
///
/// TODO: We should use actual block probability here, if available. Currently,
/// we always assume predicated blocks have a 50% chance of executing.
static unsigned getReciprocalPredBlockProb() { return 2; }

/// Returns "best known" trip count for the specified loop \p L as defined by
/// the following procedure:
/// 1) Returns exact trip count if it is known.
Expand Down Expand Up @@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel {
/// \p VF is the vectorization factor chosen for the original loop.
bool isEpilogueVectorizationProfitable(const ElementCount VF) const;

/// Return the cost of instructions in an inloop reduction pattern, if I is
/// part of that pattern.
std::optional<InstructionCost>
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
TTI::TargetCostKind CostKind) const;

/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);

private:
unsigned NumPredStores = 0;

Expand All @@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel {
/// of elements.
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);

/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);

/// The cost-computation logic from getInstructionCost which provides
/// the vector type as an output parameter.
InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
Type *&VectorTy);

/// Return the cost of instructions in an inloop reduction pattern, if I is
/// part of that pattern.
std::optional<InstructionCost>
getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
TTI::TargetCostKind CostKind) const;

/// Calculate vectorization cost of memory instruction \p I.
InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);

Expand Down Expand Up @@ -7280,10 +7288,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (!MaxFactors.hasVector())
return VectorizationFactor::Disabled();

// Select the optimal vectorization factor according to the legacy cost-model.
// This is now only used to verify the decisions by the new VPlan-based
// cost-model and will be retired once the VPlan-based cost-model is
// stabilized.
// Select the optimal vectorization factor.
VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
if (!hasPlanWithVF(VF.Width)) {
Expand All @@ -7294,166 +7299,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VF;
}

InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
ElementCount VF) const {
return CM.getInstructionCost(UI, VF).first;
}

bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
return (IsVector && CM.VecValuesToIgnore.contains(UI)) ||
SkipCostComputation.contains(UI);
}

InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
InstructionCost Cost = 0;
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);

// Cost modeling for inductions is inaccurate in the legacy cost model
// compared to the recipes that are generated. To match here initially during
// VPlan cost model bring up directly use the induction costs from the legacy
// cost model. Note that we do this as pre-processing; the VPlan may not have
// any recipes associated with the original induction increment instruction.
// We precompute the cost of both induction increment instructions that are
// represented by recipes and those that are not, to avoid distinguishing
// between them here, and skip all recipes that represent induction increments
// (the former case) later on, if they exist, to avoid counting them twice.
// TODO: Switch to more accurate costing based on VPlan.
for (const auto &[IV, _] : Legal->getInductionVars()) {
Instruction *IVInc = cast<Instruction>(
IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
assert(!CostCtx.SkipCostComputation.contains(IVInc) &&
"Same IV increment for multiple inductions?");
CostCtx.SkipCostComputation.insert(IVInc);
InstructionCost InductionCost = CostCtx.getLegacyCost(IVInc, VF);
LLVM_DEBUG({
dbgs() << "Cost of " << InductionCost << " for VF " << VF
<< ":\n induction increment " << *IVInc << "\n";
IVInc->dump();
});
Cost += InductionCost;
}

/// Compute the cost of all exiting conditions of the loop using the legacy
/// cost model. This is to match the legacy behavior, which adds the cost of
/// all exit conditions. Note that this over-estimates the cost, as there will
/// be a single condition to control the vector loop.
SmallVector<BasicBlock *> Exiting;
CM.TheLoop->getExitingBlocks(Exiting);
// Add the cost of all exit conditions.
for (BasicBlock *EB : Exiting) {
auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
if (!Term)
continue;
if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
assert(!CostCtx.SkipCostComputation.contains(CondI) &&
"Condition already skipped?");
CostCtx.SkipCostComputation.insert(CondI);
Cost += CostCtx.getLegacyCost(CondI, VF);
}
}

// The legacy cost model has special logic to compute the cost of in-loop
// reductions, which may be smaller than the sum of all instructions involved
// in the reduction. For AnyOf reductions, VPlan codegen may remove the select
// which the legacy cost model uses to assign cost. Pre-compute their costs
// for now.
// TODO: Switch to costing based on VPlan once the logic has been ported.
for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) {
if (!CM.isInLoopReduction(RedPhi) &&
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind()))
continue;

// AnyOf reduction codegen may remove the select. To match the legacy cost
// model, pre-compute the cost for AnyOf reductions here.
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
auto *Select = cast<SelectInst>(*find_if(
RedPhi->users(), [](User *U) { return isa<SelectInst>(U); }));
assert(!CostCtx.SkipCostComputation.contains(Select) &&
"reduction op visited multiple times");
CostCtx.SkipCostComputation.insert(Select);
auto ReductionCost = CostCtx.getLegacyCost(Select, VF);
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
<< ":\n any-of reduction " << *Select << "\n");
Cost += ReductionCost;
continue;
}

const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
ChainOps.end());
// Also include the operands of instructions in the chain, as the cost-model
// may mark extends as free.
for (auto *ChainOp : ChainOps) {
for (Value *Op : ChainOp->operands()) {
if (auto *I = dyn_cast<Instruction>(Op))
ChainOpsAndOperands.insert(I);
}
}

// Pre-compute the cost for I, if it has a reduction pattern cost.
for (Instruction *I : ChainOpsAndOperands) {
auto ReductionCost = CM.getReductionPatternCost(
I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput);
if (!ReductionCost)
continue;

assert(!CostCtx.SkipCostComputation.contains(I) &&
"reduction op visited multiple times");
CostCtx.SkipCostComputation.insert(I);
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
<< ":\n in-loop reduction " << *I << "\n");
Cost += *ReductionCost;
}
}

// Now compute and add the VPlan-based cost.
Cost += Plan.cost(VF, CostCtx);
LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n");
return Cost;
}

VPlan &LoopVectorizationPlanner::getBestPlan() const {
// If there is a single VPlan with a single VF, return it directly.
VPlan &FirstPlan = *VPlans[0];
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
return FirstPlan;

VPlan *BestPlan = &FirstPlan;
ElementCount ScalarVF = ElementCount::getFixed(1);
assert(hasPlanWithVF(ScalarVF) &&
"More than a single plan/VF w/o any plan having scalar VF");

InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF);
VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost);

bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
if (ForceVectorization) {
// Ignore scalar width, because the user explicitly wants vectorization.
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
// evaluation.
BestFactor.Cost = InstructionCost::getMax();
}

for (auto &P : VPlans) {
for (ElementCount VF : P->vectorFactors()) {
if (VF.isScalar())
continue;
InstructionCost Cost = cost(*P, VF);
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
if (isMoreProfitable(CurrentFactor, BestFactor)) {
BestFactor = CurrentFactor;
BestPlan = &*P;
}
}
}
BestPlan->setVF(BestFactor.Width);
return *BestPlan;
}

VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
assert(count_if(VPlans,
[VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
Expand Down Expand Up @@ -10312,15 +10157,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);

VPlan &BestPlan = LVP.getBestPlan();
assert(size(BestPlan.vectorFactors()) == 1 &&
"Plan should have a single VF");
ElementCount Width = *BestPlan.vectorFactors().begin();
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
<< "\n");
assert(VF.Width == Width &&
"VPlan cost model and legacy cost model disagreed");
LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;

// Add metadata to disable runtime unrolling a scalar loop when there
Expand Down
Loading

0 comments on commit 46080ab

Please sign in to comment.