-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[VPlan] Make canonical IV part of the region #156262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
6391ca9
9361797
c709ab9
84bd5ef
bb0c0ac
5cb548e
93b05f2
959d673
00fa5fb
2c423d3
1503c71
8bc50a6
96f41f5
14f70e4
580865a
d103bef
cf165c5
1832282
05643fd
6c35c36
005b24c
102396a
320164a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4060,7 +4060,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, | |
| case VPDef::VPScalarIVStepsSC: | ||
| case VPDef::VPReplicateSC: | ||
| case VPDef::VPInstructionSC: | ||
| case VPDef::VPCanonicalIVPHISC: | ||
| case VPDef::VPVectorPointerSC: | ||
| case VPDef::VPVectorEndPointerSC: | ||
| case VPDef::VPExpandSCEVSC: | ||
|
|
@@ -8328,6 +8327,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( | |
| m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) && | ||
| "Did not find the canonical IV increment"); | ||
| cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags(); | ||
| LoopRegion->clearCanonicalIVNUW(); | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
|
|
@@ -8385,13 +8385,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( | |
| for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||
| auto *SingleDef = cast<VPSingleDefRecipe>(&R); | ||
| auto *UnderlyingValue = SingleDef->getUnderlyingValue(); | ||
| // Skip recipes that do not need transforming, including canonical IV, | ||
| // wide canonical IV and VPInstructions without underlying values. The | ||
| // Skip recipes that do not need transforming, including blends, | ||
| // widened canonical IV and VPInstructions without underlying values. The | ||
| // latter are added above for masking. | ||
| // FIXME: Migrate code relying on the underlying instruction from VPlan0 | ||
| // to construct recipes below to not use the underlying instruction. | ||
| if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>( | ||
| &R) || | ||
| if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) || | ||
| (isa<VPInstruction>(&R) && !UnderlyingValue)) | ||
| continue; | ||
| assert(isa<VPInstruction>(&R) && UnderlyingValue && "unsupported recipe"); | ||
|
|
@@ -8578,8 +8577,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { | |
| VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, | ||
| Builder, BlockMaskCache, nullptr /*LVer*/); | ||
| for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { | ||
| if (isa<VPCanonicalIVPHIRecipe>(&R)) | ||
| continue; | ||
| auto *HeaderR = cast<VPHeaderPHIRecipe>(&R); | ||
| RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); | ||
| } | ||
|
|
@@ -9334,8 +9331,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { | |
| SmallPtrSet<PHINode *, 2> EpiWidenedPhis; | ||
| for (VPRecipeBase &R : | ||
| EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { | ||
| if (isa<VPCanonicalIVPHIRecipe>(&R)) | ||
| continue; | ||
| EpiWidenedPhis.insert( | ||
| cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue())); | ||
| } | ||
|
|
@@ -9396,10 +9391,10 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { | |
| VPPhi *ResumePhi = nullptr; | ||
| if (ResumePhiIter == MainScalarPH->phis().end()) { | ||
| VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); | ||
| Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC); | ||
| ResumePhi = ScalarPHBuilder.createScalarPhi( | ||
| {VectorTC, | ||
| MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()}, | ||
| {}, "vec.epilog.resume.val"); | ||
| {VectorTC, MainPlan.getConstantInt(Ty, 0)}, {}, | ||
| "vec.epilog.resume.val"); | ||
| } else { | ||
| ResumePhi = cast<VPPhi>(&*ResumePhiIter); | ||
| if (MainScalarPH->begin() == MainScalarPH->end()) | ||
|
|
@@ -9426,7 +9421,6 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( | |
| VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); | ||
| Header->setName("vec.epilog.vector.body"); | ||
|
|
||
| VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV(); | ||
| // When vectorizing the epilogue loop, the canonical induction needs to be | ||
| // adjusted by the value after the main vector loop. Find the resume value | ||
| // created during execution of the main VPlan. It must be the first phi in the | ||
|
|
@@ -9456,6 +9450,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( | |
| EPI.VectorTripCount = EPResumeVal->getOperand(0); | ||
| } | ||
| VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent: better rename |
||
| VPValue *IV = VectorLoop->getCanonicalIV(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This replaces an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep originally part of the loop |
||
| assert(all_of(IV->users(), | ||
| [](const VPUser *U) { | ||
| return isa<VPScalarIVStepsRecipe>(U) || | ||
|
|
@@ -9474,9 +9469,8 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( | |
| DenseMap<Value *, Value *> ToFrozen; | ||
| SmallVector<Instruction *> InstsToMove; | ||
| // Ensure that the start values for all header phi recipes are updated before | ||
| // vectorizing the epilogue loop. Skip the canonical IV, which has been | ||
| // handled above. | ||
| for (VPRecipeBase &R : drop_begin(Header->phis())) { | ||
| // vectorizing the epilogue loop. | ||
| for (VPRecipeBase &R : Header->phis()) { | ||
| Value *ResumeV = nullptr; | ||
| // TODO: Move setting of resume values to prepareToExecute. | ||
| if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -129,10 +129,14 @@ void VPDef::dump() const { | |
| #endif | ||
|
|
||
| VPRecipeBase *VPValue::getDefiningRecipe() { | ||
| if (SubclassID == VPRegionValueSC) | ||
| return nullptr; | ||
| return cast_or_null<VPRecipeBase>(Def); | ||
| } | ||
|
|
||
| const VPRecipeBase *VPValue::getDefiningRecipe() const { | ||
| if (SubclassID == VPRegionValueSC) | ||
| return nullptr; | ||
| return cast_or_null<VPRecipeBase>(Def); | ||
| } | ||
|
|
||
|
|
@@ -731,10 +735,13 @@ VPRegionBlock *VPRegionBlock::clone() { | |
| VPRegionBlock *NewRegion = | ||
| isReplicator() | ||
| ? Plan.createReplicateRegion(NewEntry, NewExiting, getName()) | ||
| : Plan.createLoopRegion(getName(), NewEntry, NewExiting); | ||
| : Plan.createLoopRegion(CanIVInfo->getType(), | ||
| CanIVInfo->getDebugLoc(), getName(), NewEntry, | ||
| NewExiting); | ||
|
|
||
| for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) | ||
| Block->setParent(NewRegion); | ||
|
|
||
| return NewRegion; | ||
| } | ||
|
|
||
|
|
@@ -819,6 +826,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, | |
| VPSlotTracker &SlotTracker) const { | ||
| O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; | ||
| auto NewIndent = Indent + " "; | ||
| if (!isReplicator()) { | ||
| O << '\n'; | ||
| getCanonicalIV()->print(O, SlotTracker); | ||
| O << " = CANONICAL-IV\n"; | ||
| } | ||
| for (auto *BlockBase : vp_depth_first_shallow(Entry)) { | ||
| O << '\n'; | ||
| BlockBase->print(O, NewIndent, SlotTracker); | ||
|
|
@@ -831,18 +843,29 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, | |
|
|
||
| void VPRegionBlock::dissolveToCFGLoop() { | ||
| auto *Header = cast<VPBasicBlock>(getEntry()); | ||
| if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) { | ||
| assert(this == getPlan()->getVectorLoopRegion() && | ||
| "Canonical IV must be in the entry of the top-level loop region"); | ||
| auto *ScalarR = VPBuilder(CanIV).createScalarPhi( | ||
| {CanIV->getStartValue(), CanIV->getBackedgeValue()}, | ||
| CanIV->getDebugLoc(), "index"); | ||
| auto *ExitingLatch = cast<VPBasicBlock>(getExiting()); | ||
| VPValue *CanIV = getCanonicalIV(); | ||
| if (CanIV && CanIV->getNumUsers() > 0) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth documenting somewhere that CanIV should continue to exist (until loop region dissolves) even w/o users, i.e., avoid dce, as its existence indicates region loopness, and provides other information such as type; plus users might conceptually be introduced late(?)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, added to |
||
| VPlan &Plan = *getPlan(); | ||
| VPInstruction *CanIVInc = getCanonicalIVIncrement(); | ||
| // If the increment doesn't exist yet, create it. | ||
| if (!CanIVInc) { | ||
| auto *ExitingTerm = ExitingLatch->getTerminator(); | ||
| CanIVInc = | ||
| VPBuilder(ExitingTerm) | ||
| .createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()}, | ||
| {CanIVInfo->hasNUW(), /* HasNSW */ false}, | ||
| CanIVInfo->getDebugLoc(), "index.next"); | ||
| } | ||
| auto *ScalarR = | ||
| VPBuilder(Header, Header->begin()) | ||
| .createScalarPhi( | ||
| {Plan.getConstantInt(CanIVInfo->getType(), 0), CanIVInc}, | ||
| CanIVInfo->getDebugLoc(), "index"); | ||
| CanIV->replaceAllUsesWith(ScalarR); | ||
| CanIV->eraseFromParent(); | ||
| } | ||
|
|
||
| VPBlockBase *Preheader = getSinglePredecessor(); | ||
| auto *ExitingLatch = cast<VPBasicBlock>(getExiting()); | ||
| VPBlockBase *Middle = getSingleSuccessor(); | ||
| VPBlockUtils::disconnectBlocks(Preheader, this); | ||
| VPBlockUtils::disconnectBlocks(this, Middle); | ||
|
|
@@ -855,6 +878,25 @@ void VPRegionBlock::dissolveToCFGLoop() { | |
| VPBlockUtils::connectBlocks(ExitingLatch, Header); | ||
| } | ||
|
|
||
| VPInstruction *VPRegionBlock::getCanonicalIVIncrement() { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Follow-up: represent CanIVInc as a region VPValue too?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, added a TODO |
||
| // TODO: Represent the increment as VPRegionValue as well. | ||
| auto *ExitingLatch = cast<VPBasicBlock>(getExiting()); | ||
| VPValue *CanIV = getCanonicalIV(); | ||
| assert(CanIV && "Expected a canonical IV"); | ||
|
|
||
| auto *ExitingTerm = ExitingLatch->getTerminator(); | ||
| VPInstruction *CanIVInc = nullptr; | ||
| if (match(ExitingTerm, | ||
| m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) { | ||
| assert(match(CanIVInc, | ||
| m_c_Add(m_CombineOr(m_Specific(CanIV), | ||
| m_c_Add(m_Specific(CanIV), m_LiveIn())), | ||
| m_VPValue())) && | ||
| "invalid existing IV increment"); | ||
| } | ||
|
Comment on lines
+889
to
+896
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok to only have an assert under an if?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep I think it should be fine, alternative is capturing the result of the match in a variable, marking it as maybe unusued and use that in the assert |
||
| return CanIVInc; | ||
| } | ||
|
|
||
| VPlan::VPlan(Loop *L) { | ||
| setEntry(createVPIRBasicBlock(L->getLoopPreheader())); | ||
| ScalarHeader = createVPIRBasicBlock(L->getHeader()); | ||
|
|
@@ -879,7 +921,11 @@ VPlan::~VPlan() { | |
| for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) | ||
| R.setOperand(I, &DummyValue); | ||
| } | ||
| } else if (!cast<VPRegionBlock>(VPB)->isReplicator()) { | ||
| cast<VPRegionBlock>(VPB)->getCanonicalIV()->replaceAllUsesWith( | ||
| &DummyValue); | ||
| } | ||
|
|
||
| delete VPB; | ||
| } | ||
| for (VPValue *VPV : getLiveIns()) | ||
|
|
@@ -1187,6 +1233,14 @@ VPlan *VPlan::duplicate() { | |
| // else NewTripCount will be created and inserted into Old2NewVPValues when | ||
| // TripCount is cloned. In any case NewPlan->TripCount is updated below. | ||
|
|
||
| if (auto *LoopRegion = getVectorLoopRegion()) { | ||
| assert(LoopRegion->getCanonicalIV() && | ||
| NewPlan->getVectorLoopRegion()->getCanonicalIV() && | ||
| "Loop regions of both plans must have canonical IVs."); | ||
| Old2NewVPValues[LoopRegion->getCanonicalIV()] = | ||
| NewPlan->getVectorLoopRegion()->getCanonicalIV(); | ||
| } | ||
|
|
||
| remapOperands(Entry, NewEntry, Old2NewVPValues); | ||
|
|
||
| // Initialize remaining fields of cloned VPlan. | ||
|
|
@@ -1367,6 +1421,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { | |
| /// Returns true if there is a vector loop region and \p VPV is defined in a | ||
| /// loop region. | ||
| static bool isDefinedInsideLoopRegions(const VPValue *VPV) { | ||
| if (isa<VPRegionValue>(VPV)) | ||
| return true; | ||
| const VPRecipeBase *DefR = VPV->getDefiningRecipe(); | ||
| return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || | ||
| DefR->getParent()->getEnclosingLoopRegion()); | ||
|
|
@@ -1476,9 +1532,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) { | |
|
|
||
| ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> | ||
| RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); | ||
| for (const VPBasicBlock *VPBB : | ||
| VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT)) | ||
| assignNames(VPBB); | ||
| for (const VPBlockBase *VPB : RPOT) { | ||
| if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) | ||
| assignNames(VPBB); | ||
| else if (!cast<VPRegionBlock>(VPB)->isReplicator()) | ||
| assignName(cast<VPRegionBlock>(VPB)->getCanonicalIV()); | ||
|
Comment on lines
+1538
to
+1539
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can also introduce |
||
| } | ||
| } | ||
|
|
||
| void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replace "including canonical IV," above with "including blends,"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks