diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 25bf49db0e073..c1ddb1049c421 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4035,7 +4035,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPScalarIVStepsSC: case VPDef::VPReplicateSC: case VPDef::VPInstructionSC: - case VPDef::VPCanonicalIVPHISC: case VPDef::VPVectorPointerSC: case VPDef::VPVectorEndPointerSC: case VPDef::VPExpandSCEVSC: @@ -8276,6 +8275,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) && "Did not find the canonical IV increment"); cast(IVInc)->dropPoisonGeneratingFlags(); + LoopRegion->clearCanonicalIVNUW(); } // --------------------------------------------------------------------------- @@ -8338,8 +8338,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // latter are added above for masking. // FIXME: Migrate code relying on the underlying instruction from VPlan0 // to construct recipes below to not use the underlying instruction. - if (isa( - &R) || + if (isa(&R) || (isa(&R) && !UnderlyingValue)) continue; assert(isa(&R) && UnderlyingValue && "unsupported recipe"); @@ -8525,8 +8524,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, Builder, BlockMaskCache, nullptr /*LVer*/); for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (isa(&R)) - continue; auto *HeaderR = cast(&R); RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); } @@ -9284,8 +9281,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { SmallPtrSet EpiWidenedPhis; for (VPRecipeBase &R : EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (isa(&R)) - continue; EpiWidenedPhis.insert( cast(R.getVPSingleValue()->getUnderlyingValue())); } @@ -9346,10 +9341,10 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { VPPhi *ResumePhi = nullptr; if (ResumePhiIter == MainScalarPH->phis().end()) { VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); + Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC); ResumePhi = ScalarPHBuilder.createScalarPhi( - {VectorTC, - MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()}, - {}, "vec.epilog.resume.val"); + {VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {}, + "vec.epilog.resume.val"); } else { ResumePhi = cast(&*ResumePhiIter); if (MainScalarPH->begin() == MainScalarPH->end()) @@ -9376,7 +9371,6 @@ static SmallVector preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV(); // When vectorizing the epilogue loop, the canonical induction needs to be // adjusted by the value after the main vector loop. Find the resume value // created during execution of the main VPlan. It must be the first phi in the @@ -9406,6 +9400,7 @@ static SmallVector preparePlanForEpilogueVectorLoop( EPI.VectorTripCount = EPResumeVal->getOperand(0); } VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); + VPValue *IV = VectorLoop->getCanonicalIV(); assert(all_of(IV->users(), [](const VPUser *U) { return isa(U) || @@ -9424,9 +9419,8 @@ static SmallVector preparePlanForEpilogueVectorLoop( DenseMap ToFrozen; SmallVector InstsToMove; // Ensure that the start values for all header phi recipes are updated before - // vectorizing the epilogue loop. Skip the canonical IV, which has been - // handled above. - for (VPRecipeBase &R : drop_begin(Header->phis())) { + // vectorizing the epilogue loop. + for (VPRecipeBase &R : Header->phis()) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 428a8f4c1348f..e13538d1b0394 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -103,20 +103,20 @@ VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) VPValue::~VPValue() { assert(Users.empty() && "trying to delete a VPValue with remaining users"); - if (Def) - Def->removeDefinedValue(this); + if (VPDef *D = getDefiningRecipe()) + D->removeDefinedValue(this); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { - if (const VPRecipeBase *R = dyn_cast_or_null(Def)) + if (const VPRecipeBase *R = getDefiningRecipe()) R->print(OS, "", SlotTracker); else printAsOperand(OS, SlotTracker); } void VPValue::dump() const { - const VPRecipeBase *Instr = dyn_cast_or_null(this->Def); + const VPRecipeBase *Instr = getDefiningRecipe(); VPSlotTracker SlotTracker( (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); print(dbgs(), SlotTracker); @@ -133,10 +133,14 @@ void VPDef::dump() const { #endif VPRecipeBase *VPValue::getDefiningRecipe() { + if (SubclassID == VPRegionValueSC) + return nullptr; return cast_or_null(Def); } const VPRecipeBase *VPValue::getDefiningRecipe() const { + if (SubclassID == VPRegionValueSC) + return nullptr; return cast_or_null(Def); } @@ -746,10 +750,13 @@ VPRegionBlock *VPRegionBlock::clone() { VPRegionBlock *NewRegion = isReplicator() ? Plan.createReplicateRegion(NewEntry, NewExiting, getName()) - : Plan.createLoopRegion(getName(), NewEntry, NewExiting); + : Plan.createLoopRegion(CanIVInfo->getType(), + CanIVInfo->getDebugLoc(), getName(), NewEntry, + NewExiting); for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) Block->setParent(NewRegion); + return NewRegion; } @@ -834,6 +841,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << (isReplicator() ? " " : " ") << getName() << ": {"; auto NewIndent = Indent + " "; + if (!isReplicator()) { + O << '\n'; + getCanonicalIV()->print(O, SlotTracker); + O << " = CANONICAL-IV\n"; + } for (auto *BlockBase : vp_depth_first_shallow(Entry)) { O << '\n'; BlockBase->print(O, NewIndent, SlotTracker); @@ -846,18 +858,30 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, void VPRegionBlock::dissolveToCFGLoop() { auto *Header = cast(getEntry()); - if (auto *CanIV = dyn_cast(&Header->front())) { - assert(this == getPlan()->getVectorLoopRegion() && - "Canonical IV must be in the entry of the top-level loop region"); - auto *ScalarR = VPBuilder(CanIV).createScalarPhi( - {CanIV->getStartValue(), CanIV->getBackedgeValue()}, - CanIV->getDebugLoc(), "index"); + auto *ExitingLatch = cast(getExiting()); + VPValue *CanIV = getCanonicalIV(); + if (CanIV && CanIV->getNumUsers() > 0) { + VPlan &Plan = *getPlan(); + VPInstruction *CanIVInc = getCanonicalIVIncrement(); + // If the increment doesn't exist yet, create it. + if (!CanIVInc) { + auto *ExitingTerm = ExitingLatch->getTerminator(); + CanIVInc = + VPBuilder(ExitingTerm) + .createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()}, + {CanIVInfo->hasNUW(), /* HasNSW */ false}, + CanIVInfo->getDebugLoc(), "index.next"); + } + auto *ScalarR = + VPBuilder(Header, Header->begin()) + .createScalarPhi( + {Plan.getOrAddLiveIn(ConstantInt::get(CanIVInfo->getType(), 0)), + CanIVInc}, + CanIVInfo->getDebugLoc(), "index"); CanIV->replaceAllUsesWith(ScalarR); - CanIV->eraseFromParent(); } VPBlockBase *Preheader = getSinglePredecessor(); - auto *ExitingLatch = cast(getExiting()); VPBlockBase *Middle = getSingleSuccessor(); VPBlockUtils::disconnectBlocks(Preheader, this); VPBlockUtils::disconnectBlocks(this, Middle); @@ -870,6 +894,24 @@ void VPRegionBlock::dissolveToCFGLoop() { VPBlockUtils::connectBlocks(ExitingLatch, Header); } +VPInstruction *VPRegionBlock::getCanonicalIVIncrement() { + auto *ExitingLatch = cast(getExiting()); + VPValue *CanIV = getCanonicalIV(); + assert(CanIV && "Expected a canonical IV"); + + auto *ExitingTerm = ExitingLatch->getTerminator(); + VPInstruction *CanIVInc = nullptr; + if (match(ExitingTerm, + m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) { + assert(match(CanIVInc, + m_c_Add(m_CombineOr(m_Specific(CanIV), + m_c_Add(m_Specific(CanIV), m_LiveIn())), + m_VPValue())) && + "invalid existing IV increment"); + } + return CanIVInc; +} + VPlan::VPlan(Loop *L) { setEntry(createVPIRBasicBlock(L->getLoopPreheader())); ScalarHeader = createVPIRBasicBlock(L->getHeader()); @@ -894,7 +936,11 @@ VPlan::~VPlan() { for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) R.setOperand(I, &DummyValue); } + } else if (!cast(VPB)->isReplicator()) { + cast(VPB)->getCanonicalIV()->replaceAllUsesWith( + &DummyValue); } + delete VPB; } for (VPValue *VPV : getLiveIns()) @@ -1202,6 +1248,11 @@ VPlan *VPlan::duplicate() { // else NewTripCount will be created and inserted into Old2NewVPValues when // TripCount is cloned. In any case NewPlan->TripCount is updated below. + if (auto *LoopRegion = getVectorLoopRegion()) { + Old2NewVPValues[LoopRegion->getCanonicalIV()] = + NewPlan->getVectorLoopRegion()->getCanonicalIV(); + } + remapOperands(Entry, NewEntry, Old2NewVPValues); // Initialize remaining fields of cloned VPlan. @@ -1382,6 +1433,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { /// Returns true if there is a vector loop region and \p VPV is defined in a /// loop region. static bool isDefinedInsideLoopRegions(const VPValue *VPV) { + if (isa(VPV)) + return true; const VPRecipeBase *DefR = VPV->getDefiningRecipe(); return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || DefR->getParent()->getEnclosingLoopRegion()); @@ -1491,9 +1544,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) { ReversePostOrderTraversal> RPOT(VPBlockDeepTraversalWrapper(Plan.getEntry())); - for (const VPBasicBlock *VPBB : - VPBlockUtils::blocksOnly(RPOT)) - assignNames(VPBB); + for (const VPBlockBase *VPB : RPOT) { + if (auto *VPBB = dyn_cast(VPB)) + assignNames(VPBB); + else if (!cast(VPB)->isReplicator()) + assignName(cast(VPB)->getCanonicalIV()); + } } void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1504acfcf7e52..ca36f80849544 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -546,7 +546,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenSelectSC: case VPRecipeBase::VPBlendSC: case VPRecipeBase::VPPredInstPHISC: - case VPRecipeBase::VPCanonicalIVPHISC: case VPRecipeBase::VPActiveLaneMaskPHISC: case VPRecipeBase::VPFirstOrderRecurrencePHISC: case VPRecipeBase::VPWidenPHISC: @@ -1960,12 +1959,6 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, /// the backedge is the second operand. /// /// Inductions are modeled using the following sub-classes: -/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop, -/// starting at a specified value (zero for the main vector loop, the resume -/// value for the epilogue vector loop) and stepping by 1. The induction -/// controls exiting of the vector loop by comparing against the vector trip -/// count. Produces a single scalar PHI for the induction value per -/// iteration. /// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and /// floating point inductions with arbitrary start and step values. Produces /// a vector PHI per-part. @@ -3464,63 +3457,6 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe { const SCEV *getSCEV() const { return Expr; } }; -/// Canonical scalar induction phi of the vector loop. Starting at the specified -/// start value (either 0 or the resume value when vectorizing the epilogue -/// loop). VPWidenCanonicalIVRecipe represents the vector version of the -/// canonical induction variable. -class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { -public: - VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL) - : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {} - - ~VPCanonicalIVPHIRecipe() override = default; - - VPCanonicalIVPHIRecipe *clone() override { - auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc()); - R->addOperand(getBackedgeValue()); - return R; - } - - VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC) - - void execute(VPTransformState &State) override { - llvm_unreachable("cannot execute this recipe, should be replaced by a " - "scalar phi recipe"); - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - /// Returns the scalar type of the induction. - Type *getScalarType() const { - return getStartValue()->getLiveInIRValue()->getType(); - } - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - - /// Returns true if the recipe only uses the first part of operand \p Op. - bool onlyFirstPartUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - return true; - } - - /// Return the cost of this VPCanonicalIVPHIRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override { - // For now, match the behavior of the legacy cost model. - return 0; - } -}; - /// A recipe for generating the active lane mask for the vector loop that is /// used to predicate the vector operations. /// TODO: It would be good to use the existing VPWidenPHIRecipe instead and @@ -3599,14 +3535,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe, public VPUnrollPartAccessor<1> { public: - VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) + VPWidenCanonicalIVRecipe(VPValue *CanonicalIV) : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {} ~VPWidenCanonicalIVRecipe() override = default; VPWidenCanonicalIVRecipe *clone() override { - return new VPWidenCanonicalIVRecipe( - cast(getOperand(0))); + return new VPWidenCanonicalIVRecipe(getOperand(0)); } VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC) @@ -3645,8 +3580,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, - const Twine &Name = "") + VPValue *CanonicalIV, VPValue *Step, const Twine &Name = "") : VPDerivedIVRecipe( IndDesc.getKind(), dyn_cast_or_null(IndDesc.getInductionBinOp()), @@ -3992,6 +3926,32 @@ class VPIRBasicBlock : public VPBasicBlock { BasicBlock *getIRBasicBlock() const { return IRBB; } }; +/// Track information about the canonical IV value of a region. +class VPCanonicalIVInfo { + /// VPRegionValue for the canonical IV. The allocation is managed by + /// VPCanonicalIVInfo. + std::unique_ptr CanIV; + Type *Ty = nullptr; + bool HasNUW = true; + DebugLoc DL = DebugLoc::getUnknown(); + +public: + VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region, + bool HasNUW = true) + : CanIV(std::make_unique(Region)), Ty(Ty), HasNUW(HasNUW), + DL(DL) {} + + VPCanonicalIVInfo *clone(VPRegionBlock *Region) const { + return new VPCanonicalIVInfo(Ty, DL, Region, HasNUW); + } + + VPRegionValue *getVPValue() { return CanIV.get(); } + Type *getType() const { return Ty; } + DebugLoc getDebugLoc() const { return DL; } + bool hasNUW() const { return HasNUW; } + void clearNUW() { HasNUW = false; } +}; + /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. /// A VPRegionBlock may indicate that its contents are to be replicated several @@ -4010,26 +3970,33 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { /// VPRegionBlock. VPBlockBase *Exiting; - /// An indicator whether this region is to generate multiple replicated - /// instances of output IR corresponding to its VPBlockBases. - bool IsReplicator; + /// Holds the Canonical IV of the loop region along with additional + /// information. If CanIV is nullptr, the region is a replicating region. + VPCanonicalIVInfo *CanIVInfo = nullptr; /// Use VPlan::createVPRegionBlock to create VPRegionBlocks. VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, - const std::string &Name = "", bool IsReplicator = false) - : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), - IsReplicator(IsReplicator) { - assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); - assert(Exiting->getSuccessors().empty() && "Exit block has successors."); - Entry->setParent(this); - Exiting->setParent(this); - } - VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) - : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), - IsReplicator(IsReplicator) {} + const std::string &Name = "") + : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) { + if (Entry) { + assert(Entry->getPredecessors().empty() && + "Entry block has predecessors."); + Entry->setParent(this); + } + if (Exiting) { + assert(Exiting->getSuccessors().empty() && "Exit block has successors."); + Exiting->setParent(this); + } + } + + VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry, + VPBlockBase *Exiting, const std::string &Name = "") + : VPRegionBlock(Entry, Exiting, Name) { + CanIVInfo = new VPCanonicalIVInfo(CanIVTy, DL, this); + } public: - ~VPRegionBlock() override {} + ~VPRegionBlock() override { delete CanIVInfo; } /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPBlockBase *V) { @@ -4068,7 +4035,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. - bool isReplicator() const { return IsReplicator; } + bool isReplicator() const { return !CanIVInfo; } /// The method which generates the output IR instructions that correspond to /// this VPRegionBlock, thereby "executing" the VPlan. @@ -4097,24 +4064,23 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { /// its entry, and its exiting block to its successor. void dissolveToCFGLoop(); - /// Returns the canonical induction recipe of the region. - VPCanonicalIVPHIRecipe *getCanonicalIV() { - VPBasicBlock *EntryVPBB = getEntryBasicBlock(); - if (EntryVPBB->empty()) { - // VPlan native path. TODO: Unify both code paths. - EntryVPBB = cast(EntryVPBB->getSingleSuccessor()); - } - return cast(&*EntryVPBB->begin()); - } - const VPCanonicalIVPHIRecipe *getCanonicalIV() const { - return const_cast(this)->getCanonicalIV(); - } + /// Get the canonical IV increment instruction. If the exiting terminator + /// is a BranchOnCount with an IV increment, return it. Otherwise, create + /// a new IV increment and return it. + VPInstruction *getCanonicalIVIncrement(); - /// Return the type of the canonical IV for loop regions. - Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); } - const Type *getCanonicalIVType() const { - return getCanonicalIV()->getScalarType(); - } + /// Return the canonical induction variable of the region, null for + /// replicating regions. + VPValue *getCanonicalIV() { return CanIVInfo->getVPValue(); } + const VPValue *getCanonicalIV() const { return CanIVInfo->getVPValue(); } + + Type *getCanonicalIVType() const { return CanIVInfo->getType(); } + + const VPCanonicalIVInfo &getCanonicalIVInfo() const { return *CanIVInfo; } + + DebugLoc getCanonicalIVDebugLoc() const { return CanIVInfo->getDebugLoc(); } + bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); } + void clearCanonicalIVNUW() { CanIVInfo->clearNUW(); } }; inline VPRegionBlock *VPRecipeBase::getRegion() { @@ -4448,11 +4414,11 @@ class VPlan { /// Create a new loop region with \p Name and entry and exiting blocks set /// to \p Entry and \p Exiting respectively, if set. The returned block is /// owned by the VPlan and deleted once the VPlan is destroyed. - VPRegionBlock *createLoopRegion(const std::string &Name = "", + VPRegionBlock *createLoopRegion(Type *CanIVTy, DebugLoc DL, + const std::string &Name = "", VPBlockBase *Entry = nullptr, VPBlockBase *Exiting = nullptr) { - auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name) - : new VPRegionBlock(Name); + auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name); CreatedBlocks.push_back(VPB); return VPB; } @@ -4462,7 +4428,7 @@ class VPlan { /// destroyed. VPRegionBlock *createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name = "") { - auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true); + auto *VPB = new VPRegionBlock(Entry, Exiting, Name); CreatedBlocks.push_back(VPB); return VPB; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 80a2e4bc3f754..ab456e0ffb0c7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -23,14 +23,6 @@ using namespace llvm; #define DEBUG_TYPE "vplan" VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) { - if (auto LoopRegion = Plan.getVectorLoopRegion()) { - if (const auto *CanIV = dyn_cast( - &LoopRegion->getEntryBasicBlock()->front())) { - CanonicalIVTy = CanIV->getScalarType(); - return; - } - } - // If there's no canonical IV, retrieve the type from the trip count // expression. auto *TC = Plan.getTripCount(); @@ -272,18 +264,20 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { return CanonicalIVTy; } + if (auto *CanIV = dyn_cast(V)) + return CanonicalIVTy; + Type *ResultTy = TypeSwitch(V->getDefiningRecipe()) - .Case( - [this](const auto *R) { - // Handle header phi recipes, except VPWidenIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + .Case([this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) .Case llvm::calculateRegisterUsageForPlan( // FIXME: Might need some motivation why these values are ignored. If // for example an argument is used inside the loop it will increase the // register pressure (so shouldn't we add it to LoopInvariants). - if (!DefR && (!U->getLiveInIRValue() || - !isa(U->getLiveInIRValue()))) + if (!isa(U) && !DefR && + (!U->getLiveInIRValue() || + !isa(U->getLiveInIRValue()))) continue; // If this recipe is outside the loop then record it and continue. - if (!DefR) { + if (!DefR && !isa(U)) { LoopInvariants.insert(U); continue; } @@ -501,6 +496,10 @@ SmallVector llvm::calculateRegisterUsageForPlan( return TTICapture.getRegUsageForType(VectorType::get(Ty, VF)); }; + VPValue *CanIV = LoopRegion->getCanonicalIV(); + if (CanIV->getNumUsers() != 0) + OpenIntervals.insert(CanIV); + // We scan the instructions linearly and record each time that a new interval // starts, by placing it in a set. If we find this value in TransposEnds then // we remove it from the set. The max register usage is the maximum register @@ -546,7 +545,7 @@ SmallVector llvm::calculateRegisterUsageForPlan( continue; if (VFs[J].isScalar() || - isa(VPV) || (isa(VPV) && vputils::onlyScalarValuesUsed(VPV)) || (isa(VPV) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 65688a3f0b6be..4394ded7639ae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -402,17 +402,32 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor(); assert(LatchExitVPB && "Latch expected to be left with a single successor"); + VPPhi *ScalarCanIV = nullptr; + Type *CanIVTy = nullptr; + DebugLoc DL = DebugLoc::getCompilerGenerated(); + // Get type info and debug location from the scalar phi corresponding to the + // canonical IV for outermost loops. + if (PreheaderVPBB->getSinglePredecessor() == Plan.getEntry()) { + ScalarCanIV = cast(&*cast(HeaderVPB)->begin()); + CanIVTy = ScalarCanIV->getOperand(0)->getLiveInIRValue()->getType(); + DL = ScalarCanIV->getDebugLoc(); + } + // Create an empty region first and insert it between PreheaderVPBB and // LatchExitVPB, taking care to preserve the original predecessor & successor // order of blocks. Set region entry and exiting after both HeaderVPB and // LatchVPBB have been disconnected from their predecessors/successors. - auto *R = Plan.createLoopRegion(); + auto *R = Plan.createLoopRegion(CanIVTy, DL); VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R); VPBlockUtils::disconnectBlocks(LatchVPBB, R); VPBlockUtils::connectBlocks(PreheaderVPBB, R); R->setEntry(HeaderVPB); R->setExiting(LatchVPBB); + if (ScalarCanIV) { + ScalarCanIV->replaceAllUsesWith(R->getCanonicalIV()); + ScalarCanIV->eraseFromParent(); + } // All VPBB's reachable shallowly from HeaderVPB belong to the current region. for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB)) VPBB->setParent(R); @@ -425,9 +440,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, DebugLoc DL) { Value *StartIdx = ConstantInt::get(IdxTy, 0); auto *StartV = Plan.getOrAddLiveIn(StartIdx); - - // Add a VPCanonicalIVPHIRecipe starting at 0 to the header. - auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); + auto *CanonicalIVPHI = new VPPhi(StartV, DL); HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin()); // We are about to replace the branch to exit the region. Remove the original @@ -445,8 +458,6 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, auto *CanonicalIVIncrement = Builder.createOverflowingOp( Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL, "index.next"); - CanonicalIVPHI->addOperand(CanonicalIVIncrement); - // Add the BranchOnCount VPInstruction to the latch. Builder.createNaryOp(VPInstruction::BranchOnCount, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, @@ -662,7 +673,7 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, VPBuilder(CheckBlockVPBB) .createNaryOp( VPInstruction::BranchOnCond, {CondVPV}, - Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc()); + Plan.getVectorLoopRegion()->getCanonicalIVInfo().getDebugLoc()); if (AddBranchWeights) { MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = @@ -827,7 +838,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { VPReductionPHIRecipe *RedPhiR = nullptr; bool HasUnsupportedPhi = false; for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) { - if (isa(&R)) + if (isa(&R)) continue; auto *Cur = dyn_cast(&R); if (!Cur) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b5b98c64543e4..a17b0ef52498c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -312,7 +312,6 @@ struct Recipe_match { auto *DefR = dyn_cast(R); // Check for recipes that do not have opcodes. if constexpr (std::is_same_v || - std::is_same_v || std::is_same_v) return DefR; else diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f9c15a31167fa..5044ad7f15813 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -68,7 +68,6 @@ bool VPRecipeBase::mayWriteToMemory() const { ->onlyReadsMemory(); case VPWidenIntrinsicSC: return cast(this)->mayWriteToMemory(); - case VPCanonicalIVPHISC: case VPBranchOnMaskSC: case VPDerivedIVSC: case VPFirstOrderRecurrencePHISC: @@ -4279,16 +4278,6 @@ InstructionCost VPInterleaveBase::computeCost(ElementCount VF, 0); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EMIT "; - printAsOperand(O, SlotTracker); - O << " = CANONICAL-INDUCTION "; - printOperands(O, SlotTracker); -} -#endif - bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { return IsScalarAfterVectorization && (!IsScalable || vputils::onlyFirstLaneUsed(this)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d491d5669ef18..867e87a8d6979 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -520,7 +520,7 @@ static void removeRedundantInductionCasts(VPlan &Plan) { /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); + VPValue *CanonicalIV = LoopRegion->getCanonicalIV(); VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; for (VPUser *U : CanonicalIV->users()) { WidenNewIV = dyn_cast(U); @@ -603,7 +603,7 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, VPBuilder &Builder) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); - VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); + VPValue *CanonicalIV = LoopRegion->getCanonicalIV(); VPSingleDefRecipe *BaseIV = Builder.createDerivedIV( Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx"); @@ -1603,9 +1603,10 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, }); auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV(); - if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ, - m_Specific(CanIV->getBackedgeValue()), - m_Specific(&Plan.getVectorTripCount())))) + if (!match(Cond, m_SpecificICmp( + CmpInst::ICMP_EQ, + m_c_Add(m_Specific(CanIV), m_Specific(&Plan.getVFxUF())), + m_Specific(&Plan.getVectorTripCount())))) return false; // The compare checks CanIV + VFxUF == vector trip count. The vector trip @@ -1764,8 +1765,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, if (all_of(Header->phis(), [](VPRecipeBase &Phi) { if (auto *R = dyn_cast(&Phi)) return R->isCanonical(); - return isa(&Phi); + return isa(&Phi); })) { for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) { if (auto *R = dyn_cast(&HeaderR)) { @@ -1780,6 +1781,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0)); HeaderR.eraseFromParent(); } + VectorRegion->getCanonicalIV()->replaceAllUsesWith(Plan.getOrAddLiveIn( + ConstantInt::getNullValue(VectorRegion->getCanonicalIVType()))); VPBlockBase *Preheader = VectorRegion->getSinglePredecessor(); VPBlockBase *Exit = VectorRegion->getSingleSuccessor(); @@ -2364,15 +2367,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); - auto *CanonicalIVPHI = TopRegion->getCanonicalIV(); - VPValue *StartV = CanonicalIVPHI->getStartValue(); - - auto *CanonicalIVIncrement = - cast(CanonicalIVPHI->getBackedgeValue()); + VPValue *CanonicalIV = TopRegion->getCanonicalIV(); + VPValue *StartV = Plan.getOrAddLiveIn( + Constant::getNullValue(TopRegion->getCanonicalIVType())); + auto *CanonicalIVIncrement = TopRegion->getCanonicalIVIncrement(); // TODO: Check if dropping the flags is needed if // !DataAndControlFlowWithoutRuntimeCheck. CanonicalIVIncrement->dropPoisonGeneratingFlags(); - DebugLoc DL = CanonicalIVIncrement->getDebugLoc(); + TopRegion->clearCanonicalIVNUW(); + DebugLoc DL = TopRegion->getCanonicalIVDebugLoc(); // We can't use StartV directly in the ActiveLaneMask VPInstruction, since // we have to take unrolling into account. Each part needs to start at // Part * VF @@ -2393,7 +2396,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // When avoiding a runtime check, the active.lane.mask inside the loop // uses a modified trip count and the induction variable increment is // done after the active.lane.mask intrinsic is called. - IncrementValue = CanonicalIVPHI; + IncrementValue = CanonicalIV; TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF, {TC}, DL); } @@ -2412,7 +2415,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // preheader ActiveLaneMask instruction. auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc::getUnknown()); - LaneMaskPhi->insertAfter(CanonicalIVPHI); + auto *HeaderVPBB = TopRegion->getEntryBasicBlock(); + LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin()); // Create the active lane mask for the next iteration of the loop before the // original terminator. @@ -2489,8 +2493,9 @@ void VPlanTransforms::addActiveLaneMask( "UseActiveLaneMaskForControlFlow"); VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - auto *FoundWidenCanonicalIVUser = find_if( - LoopRegion->getCanonicalIV()->users(), IsaPred); + auto *CanonicalIV = LoopRegion->getCanonicalIV(); + auto *FoundWidenCanonicalIVUser = + find_if(CanonicalIV->users(), IsaPred); assert(FoundWidenCanonicalIVUser && "Must have widened canonical IV when tail folding!"); VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan); @@ -2726,7 +2731,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and /// replaces all uses except the canonical IV increment of -/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe +/// VPCanonicalIV with a VPEVLBasedIVPHIRecipe. VPCanonicalIV /// is used only for loop iterations counting after this transformation. /// /// The function uses the following definitions: @@ -2774,13 +2779,15 @@ void VPlanTransforms::addExplicitVectorLength( VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); - auto *CanonicalIVPHI = LoopRegion->getCanonicalIV(); - auto *CanIVTy = LoopRegion->getCanonicalIVType(); - VPValue *StartV = CanonicalIVPHI->getStartValue(); + auto *CanonicalIV = LoopRegion->getCanonicalIV(); + auto &CanIVInfo = LoopRegion->getCanonicalIVInfo(); + auto *CanIVTy = CanIVInfo.getType(); + VPValue *StartV = Plan.getOrAddLiveIn(ConstantInt::getNullValue(CanIVTy)); + auto *CanonicalIVIncrement = LoopRegion->getCanonicalIVIncrement(); // Create the ExplicitVectorLengthPhi recipe in the main loop. auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc::getUnknown()); - EVLPhi->insertAfter(CanonicalIVPHI); + EVLPhi->insertBefore(*Header, Header->begin()); VPBuilder Builder(Header, Header->getFirstNonPhi()); // Create the AVL (application vector length), starting from TC -> 0 in steps // of EVL. @@ -2799,8 +2806,6 @@ void VPlanTransforms::addExplicitVectorLength( auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL, DebugLoc::getUnknown()); - auto *CanonicalIVIncrement = - cast(CanonicalIVPHI->getBackedgeValue()); Builder.setInsertPoint(CanonicalIVIncrement); VPValue *OpVPEVL = VPEVL; @@ -2810,8 +2815,7 @@ void VPlanTransforms::addExplicitVectorLength( auto *NextEVLIV = Builder.createOverflowingOp( Instruction::Add, {OpVPEVL, EVLPhi}, - {CanonicalIVIncrement->hasNoUnsignedWrap(), - CanonicalIVIncrement->hasNoSignedWrap()}, + {CanIVInfo.hasNUW(), /*HasNSW=*/false}, CanonicalIVIncrement->getDebugLoc(), "index.evl.next"); EVLPhi->addOperand(NextEVLIV); @@ -2822,10 +2826,10 @@ void VPlanTransforms::addExplicitVectorLength( transformRecipestoEVLRecipes(Plan, *VPEVL); - // Replace all uses of VPCanonicalIVPHIRecipe by + // Replace all uses of VPCanonicalIV by // VPEVLBasedIVPHIRecipe except for the canonical IV increment. - CanonicalIVPHI->replaceAllUsesWith(EVLPhi); - CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); + CanonicalIV->replaceAllUsesWith(EVLPhi); + CanonicalIVIncrement->setOperand(0, CanonicalIV); // TODO: support unroll factor > 1. Plan.setUF(1); } @@ -2876,15 +2880,15 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) { // Replace CanonicalIVInc with EVL-PHI increment. auto *CanonicalIV = cast(&*HeaderVPBB->begin()); VPValue *Backedge = CanonicalIV->getIncomingValue(1); - assert(match(Backedge, m_c_Add(m_Specific(CanonicalIV), - m_Specific(&Plan.getVFxUF()))) && - "Unexpected canonical iv"); - Backedge->replaceAllUsesWith(EVLIncrement); - - // Remove unused phi and increment. - VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe(); - CanonicalIVIncrement->eraseFromParent(); - CanonicalIV->eraseFromParent(); + if (match(Backedge, + m_c_Add(m_Specific(CanonicalIV), m_Specific(&Plan.getVFxUF())))) { + Backedge->replaceAllUsesWith(EVLIncrement); + + // Remove unused phi and increment. + VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe(); + CanonicalIVIncrement->eraseFromParent(); + CanonicalIV->eraseFromParent(); + } // Replace the use of VectorTripCount in the latch-exiting block. // Before: (branch-on-count EVLIVInc, VectorTripCount) @@ -3929,7 +3933,7 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) { for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (isa(&R)) + VPDerivedIVRecipe>(&R)) continue; for (VPValue *Def : R.definedValues()) { // Skip recipes that are single-scalar or only have their first lane @@ -4176,7 +4180,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VFMinVal = VF.getKnownMinValue(); SmallVector StoreGroups; for (auto &R : *VectorLoop->getEntryBasicBlock()) { - if (isa(&R) || match(&R, m_BranchOnCount())) + if (match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) continue; if (isa(&R) && @@ -4331,8 +4335,9 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // Adjust induction to reflect that the transformed plan only processes one // original iteration. - auto *CanIV = VectorLoop->getCanonicalIV(); - auto *Inc = cast(CanIV->getBackedgeValue()); + Type *CanIVTy = VectorLoop->getCanonicalIVInfo().getType(); + auto *Inc = cast( + VectorLoop->getExitingBasicBlock()->getTerminator()->getOperand(0)); VPBuilder PHBuilder(Plan.getVectorPreheader()); VPValue *UF = Plan.getOrAddLiveIn( @@ -4346,7 +4351,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, } else { Inc->setOperand(1, UF); Plan.getVF().replaceAllUsesWith( - Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1))); + Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 1))); } removeDeadRecipes(Plan); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index f15113c6293bc..1c2d36a3b5933 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -79,7 +79,7 @@ class UnrollState { void unrollBlock(VPBlockBase *VPB); VPValue *getValueForPart(VPValue *V, unsigned Part) { - if (Part == 0 || V->isLiveIn()) + if (Part == 0 || V->isLiveIn() || isa(V)) return V; assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) && "accessed value does not exist"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 8c23e78693db5..eedc0a22be132 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -83,17 +83,19 @@ const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, return SE.getCouldNotCompute(); } + if (auto *CanIV = dyn_cast(V)) { + if (!L) + return SE.getCouldNotCompute(); + const SCEV *Start = + SE.getZero(CanIV->getDefiningRegion()->getCanonicalIVType()); + return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L, + SCEV::FlagAnyWrap); + } + // TODO: Support constructing SCEVs for more recipes as needed. return TypeSwitch(V->getDefiningRecipe()) .Case( [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) - .Case([&SE, L](const VPCanonicalIVPHIRecipe *R) { - if (!L) - return SE.getCouldNotCompute(); - const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); - return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L, - SCEV::FlagAnyWrap); - }) .Case([&SE, L](const VPDerivedIVRecipe *R) { const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L); @@ -204,10 +206,7 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { return all_of(R->operands(), isUniformAcrossVFsAndUFs); } - auto *CanonicalIV = - R->getParent()->getEnclosingLoopRegion()->getCanonicalIV(); - // Canonical IV chain is uniform. - if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) + if (isa(V)) return true; return TypeSwitch(R) diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 83e3fcaaeee2b..c12ab104a20ca 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -40,6 +40,8 @@ class VPUser; class VPRecipeBase; class VPInterleaveBase; class VPPhiAccessors; +struct VPRegionValue; +class VPRegionBlock; /// This is the base class of the VPlan Def/Use graph, used for modeling the /// data flow into, within and out of the VPlan. VPValues can stand for live-ins @@ -51,6 +53,7 @@ class LLVM_ABI_FOR_TEST VPValue { friend class VPInterleaveBase; friend class VPlan; friend class VPExpressionRecipe; + friend struct VPRegionValue; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -60,9 +63,13 @@ class LLVM_ABI_FOR_TEST VPValue { /// Hold the underlying Value, if any, attached to this VPValue. Value *UnderlyingVal; - /// Pointer to the VPDef that defines this VPValue. If it is nullptr, the - /// VPValue is not defined by any recipe modeled in VPlan. - VPDef *Def; + /// Pointer to the VPDef that defines this VPValue, or VPRegionBlock for + /// VPRegionValue. If it is nullptr, the VPValue is not defined by any recipe + /// or region in VPlan, i.e. is a live-in. + union { + VPDef *Def; + VPRegionBlock *DefiningRegion; + }; VPValue(const unsigned char SC, Value *UV = nullptr, VPDef *Def = nullptr); @@ -89,7 +96,9 @@ class LLVM_ABI_FOR_TEST VPValue { enum { VPValueSC, /// A generic VPValue, like live-in values or defined by a recipe /// that defines multiple values. - VPVRecipeSC /// A VPValue sub-class that is a VPRecipeBase. + VPVRecipeSC, /// A VPValue sub-class that is a VPRecipeBase. + VPRegionValueSC, /// A VPValue sub-class that is defined by a region, like + /// the canonical IV of a loop region. }; VPValue(const VPValue &) = delete; @@ -165,10 +174,15 @@ class LLVM_ABI_FOR_TEST VPValue { const VPRecipeBase *getDefiningRecipe() const; /// Returns true if this VPValue is defined by a recipe. - bool hasDefiningRecipe() const { return getDefiningRecipe(); } + bool hasDefiningRecipe() const { + return SubclassID == VPVRecipeSC || + (SubclassID == VPValueSC && getDefiningRecipe()); + } /// Returns true if this VPValue is a live-in, i.e. defined outside the VPlan. - bool isLiveIn() const { return !hasDefiningRecipe(); } + bool isLiveIn() const { + return SubclassID == VPValueSC && !getDefiningRecipe(); + } /// Returns the underlying IR value, if this VPValue is defined outside the /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef @@ -189,6 +203,22 @@ class LLVM_ABI_FOR_TEST VPValue { } }; +/// VPValues defined by a VPRegionBlock, like the canonical IV. +struct VPRegionValue : public VPValue { + VPRegionValue(VPRegionBlock *Region) : VPValue(VPValue::VPRegionValueSC) { + DefiningRegion = Region; + } + + ~VPRegionValue() override = default; + + /// Returns the region that defines this value. + VPRegionBlock *getDefiningRegion() const { return DefiningRegion; } + + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPRegionValueSC; + } +}; + typedef DenseMap Value2VPValueTy; typedef DenseMap VPValue2ValueTy; @@ -364,7 +394,6 @@ class VPDef { VPPredInstPHISC, // START: SubclassID for recipes that inherit VPHeaderPHIRecipe. // VPHeaderPHIRecipe need to be kept together. - VPCanonicalIVPHISC, VPActiveLaneMaskPHISC, VPEVLBasedIVPHISC, VPFirstOrderRecurrencePHISC, @@ -374,7 +403,7 @@ class VPDef { // END: SubclassID for recipes that inherit VPHeaderPHIRecipe // END: Phi-like recipes VPFirstPHISC = VPWidenPHISC, - VPFirstHeaderPHISC = VPCanonicalIVPHISC, + VPFirstHeaderPHISC = VPActiveLaneMaskPHISC, VPLastHeaderPHISC = VPReductionPHISC, VPLastPHISC = VPReductionPHISC, }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 91734a10cb2c8..2d7ff0529b4b4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -465,12 +465,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) { return false; } - if (!isa(&*Entry->begin())) { - errs() << "VPlan vector loop header does not start with a " - "VPCanonicalIVPHIRecipe\n"; - return false; - } - const VPBasicBlock *Exiting = dyn_cast(TopRegion->getExiting()); if (!Exiting) { errs() << "VPlan exiting block is not a VPBasicBlock\n"; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index f16351720b20f..451e5f9857b46 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -580,10 +580,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[TMP13:%.*]] = or [[BROADCAST_SPLAT]], splat (i16 1) ; PRED-NEXT: [[TMP14:%.*]] = uitofp [[TMP13]] to ; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP14]], ptr align 8 [[NEXT_GEP]], [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -660,16 +660,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]] +; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] ; COMMON: [[PRED_STORE_IF13]]: ; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 ; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT]] +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; COMMON: [[PRED_STORE_CONTINUE14]]: +; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br label %[[EXIT:.*]] ; COMMON: [[EXIT]]: -; COMMON-NEXT: br label %[[SCALAR_PH:.*]] -; COMMON: [[SCALAR_PH]]: -; COMMON-NEXT: br label %[[EXIT1:.*]] -; COMMON: [[EXIT1]]: ; COMMON-NEXT: ret void ; entry: @@ -1049,11 +1049,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: store float 0.000000e+00, ptr [[TMP83]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE27]] ; PRED: [[PRED_STORE_CONTINUE27]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]]) ; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP85:%.*]] = xor i1 [[TMP84]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] @@ -1345,10 +1345,10 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: [[TMP26:%.*]] = fptoui [[TMP25]] to ; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[C]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP26]], ptr align 1 [[TMP27]], [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) ; PRED-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br [[EXIT:label %.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 72e813b62025f..30d5b3f82a178 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -140,11 +140,11 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP33]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( zeroinitializer, ptr align 8 [[TMP34]], [[TMP23]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP35:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -257,11 +257,11 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP37:%.*]] = ashr i64 [[TMP36]], 32 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP37]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP23]], ptr align 4 [[TMP38]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP39:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP40:%.*]] = xor i1 [[TMP39]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index faee4c1194018..7283838aa8006 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -25,10 +25,10 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-UF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-UF1-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP4]], <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-UF1-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-UF1-NEXT: [[TMP6:%.*]] = xor i1 [[TMP5]], true +; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-UF1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF1: middle.block: ; @@ -63,7 +63,6 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP17]], <4 x i1> [[ACTIVE_LANE_MASK4]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP18]], <4 x i1> [[ACTIVE_LANE_MASK5]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP19]], <4 x i1> [[ACTIVE_LANE_MASK6]]) -; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP12]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 12) ; CHECK-UF4-NEXT: [[TMP11]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 8) @@ -71,6 +70,7 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[TMP9]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) ; CHECK-UF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-UF4-NEXT: [[TMP20:%.*]] = xor i1 [[TMP21]], true +; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF4: middle.block: ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll index 199203a9f5cb0..1a481b349feda 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll @@ -10,10 +10,10 @@ define i64 @test(ptr %a, ptr %b) #0 { ; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 8: WIDEN-REDUCTION-PHI ir<{{.+}}> = phi vp<{{.+}}>, ir<{{.+}}> ; CHECK: Cost for VF 8: 30 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: Cost of 0 for VF 16: WIDEN-REDUCTION-PHI ir<{{.+}}> = phi vp<{{.+}}>, ir<{{.+}}> ; CHECK: Cost for VF 16: 56 ; CHECK: LV: Selecting VF: 16 entry: @@ -44,11 +44,9 @@ define i64 @test_external_iv_user(ptr %a, ptr %b) #0 { ; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 30 ; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 16: 57 ; CHECK: LV: Selecting VF: vscale x 2 entry: @@ -81,11 +79,9 @@ define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 { ; CHECK-NEXT: Cost of 1 for VF 8: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] ; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 27 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 1 for VF 16: EXPRESSION vp<%11> = ir<%sum> + partial.reduce.add (mul nuw nsw (ir<%1> zext to i64), (ir<%0> zext to i64)) ; CHECK: Cost for VF 16: 3 ; CHECK: LV: Selecting VF: 16 @@ -118,10 +114,8 @@ define i1 @test_extra_cmp_user(ptr nocapture noundef %dst, ptr nocapture noundef ; CHECK: Cost of 4 for VF 8: induction instruction %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 ; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ; CHECK-NEXT: Cost of 4 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %indvars.iv.next, 16 -; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 8: 12 ; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost for VF 16: 4 ; CHECK: LV: Selecting VF: 16 entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index cfc6cc87a2a21..29b807c3e9cf7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -156,10 +156,10 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP17:%.*]] = trunc [[TMP16]] to ; PRED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP17]], ptr align 1 [[TMP18]], [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP20:%.*]] = xor i1 [[TMP19]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; PRED-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -322,11 +322,11 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 1, ptr [[TMP23]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; PRED: [[PRED_STORE_CONTINUE2]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]]) ; PRED-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP25:%.*]] = xor i1 [[TMP24]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -510,11 +510,11 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE7]] ; PRED: [[PRED_STORE_CONTINUE7]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]]) ; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] @@ -699,11 +699,11 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; PRED: [[PRED_STORE_CONTINUE6]]: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]]) ; PRED-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP35:%.*]] = xor i1 [[TMP34]], true ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 157b78704234a..ebd8fda6bce59 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -64,7 +64,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] ; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]] +; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]] ; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 ; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1 ; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer @@ -83,10 +83,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8 ; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.continue2: -; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]]) ; TFCOMMON-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; TFCOMMON-NEXT: [[TMP17:%.*]] = xor i1 [[TMP15]], true +; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFCOMMON-NEXT: br i1 [[TMP17]], label [[END:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] ; TFCOMMON: end: ; TFCOMMON-NEXT: ret void @@ -105,7 +105,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] +; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]] ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 ; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1 ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer @@ -138,12 +138,12 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.continue8: -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]]) ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT10]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[TMP27]], i64 [[TMP3]]) ; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor i1 [[TMP26]], true +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP28]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFA_INTERLEAVE: end: ; TFA_INTERLEAVE-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 31453e9509ea3..858c4f6fb5173 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -967,11 +967,11 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: store double [[PREDPHI3]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label %[[TMP9]] ; TFA_INTERLEAVE: [[TMP9]]: -; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT6]] = icmp ult i64 [[TMP20]], [[TMP3]] ; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = xor i1 [[ACTIVE_LANE_MASK_NEXT]], true +; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP21]], label %[[END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFA_INTERLEAVE: [[END]]: ; TFA_INTERLEAVE-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index f50d0834c5dc8..be0aaea4a1893 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -351,10 +351,10 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -473,12 +473,12 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP21:%.*]] = add [[TMP18]], [[TMP20]] ; DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; DEFAULT-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr align 1 [[TMP22]], [[ACTIVE_LANE_MASK]]) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; DEFAULT-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: @@ -523,12 +523,12 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP21:%.*]] = add [[TMP18]], [[TMP20]] ; OPTSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; OPTSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr align 1 [[TMP22]], [[ACTIVE_LANE_MASK]]) -; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; OPTSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; OPTSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; OPTSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; OPTSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: @@ -573,12 +573,12 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP21:%.*]] = add [[TMP18]], [[TMP20]] ; MINSIZE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] ; MINSIZE-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP21]], ptr align 1 [[TMP22]], [[ACTIVE_LANE_MASK]]) -; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; MINSIZE-NEXT: [[TMP24:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; MINSIZE-NEXT: [[TMP23:%.*]] = xor i1 [[TMP24]], true ; MINSIZE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; MINSIZE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; MINSIZE: [[FOR_COND_CLEANUP]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 4636c1b63da82..38f5b24abd04b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -1346,10 +1346,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = mul nsw [[TMP16]], [[TMP13]] ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = add [[TMP17]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[TMP19]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], [[VEC_PHI]] -; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) @@ -1384,10 +1384,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = mul nsw [[TMP16]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = add [[TMP17]], [[VEC_PHI]] ; CHECK-INTERLEAVED-NEXT: [[TMP19]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], [[VEC_PHI]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP19]]) @@ -1422,10 +1422,10 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = mul nsw [[TMP16]], [[TMP13]] ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP18]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = xor i1 [[TMP19]], true +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[PARTIAL_REDUCE]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 44ae1757ce6e6..c93cac2df7546 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -233,10 +233,10 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP39:%.*]] = or [[WIDE_MASKED_GATHER]], [[VEC_PHI]] ; PRED-NEXT: [[TMP40:%.*]] = or [[TMP39]], [[WIDE_MASKED_GATHER7]] ; PRED-NEXT: [[TMP41]] = select [[ACTIVE_LANE_MASK]], [[TMP40]], [[VEC_PHI]] -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[IV]], i64 [[TMP10]]) ; PRED-NEXT: [[TMP43:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP42:%.*]] = xor i1 [[TMP43]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP2]] ; PRED-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP41]]) @@ -455,11 +455,11 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP20:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] ; PRED-NEXT: [[TMP21:%.*]] = or [[TMP20]], [[VEC_PHI]] ; PRED-NEXT: [[TMP16]] = select [[ACTIVE_LANE_MASK]], [[TMP21]], [[VEC_PHI]] -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP12]]) ; PRED-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP17:%.*]] = xor i1 [[TMP15]], true -; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP19:%.*]] = call i16 @llvm.vector.reduce.or.nxv8i16( [[TMP16]]) ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index d84463430179d..a69ab68c67f59 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -137,10 +137,10 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr align 4 [[TMP7]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP8]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -362,7 +362,6 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP28:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP26]], [[TMP27]]) ; CHECK-ORDERED-TF-NEXT: [[TMP29:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP30]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP28]], [[TMP29]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], [[TMP32]] @@ -378,6 +377,7 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP39]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = xor i1 [[TMP40]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -602,10 +602,10 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP16]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP15]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -817,10 +817,10 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = fadd [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP10]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP7]]) ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] @@ -1020,10 +1020,10 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[WIDE_MASKED_LOAD1]], splat (float 3.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -1449,7 +1449,6 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], [[TMP41]]) ; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP36]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] @@ -1465,6 +1464,7 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] @@ -1745,7 +1745,6 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP40]], [[TMP41]]) ; CHECK-ORDERED-TF-NEXT: [[TMP43:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP36]], splat (float -0.000000e+00) ; CHECK-ORDERED-TF-NEXT: [[TMP44]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP42]], [[TMP43]]) -; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[TMP45:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP46:%.*]] = shl nuw i64 [[TMP45]], 3 ; CHECK-ORDERED-TF-NEXT: [[TMP47:%.*]] = add i64 [[INDEX]], [[TMP46]] @@ -1761,6 +1760,7 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true +; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll index 9a831690d632d..144fa8001bb4c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll @@ -32,10 +32,10 @@ define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP9]], ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT_B]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP10]], ptr align 4 [[TMP12]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -91,10 +91,10 @@ define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP9]], ptr align 8 [[TMP11]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[OUT_B]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP10]], ptr align 8 [[TMP12]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -168,10 +168,10 @@ define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, pt ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP17]], ptr align 4 [[TMP19]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[OUT_B]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP18]], ptr align 4 [[TMP20]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 0d8a1021bd438..f9e6a705db707 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -90,10 +90,10 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; PRED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( zeroinitializer, ptr align 1 [[TMP7]], [[ACTIVE_LANE_MASK]]) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; PRED-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP9:%.*]] = xor i1 [[TMP8]], true +; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 02cc499f18827..0e568af2eea65 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -109,10 +109,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP15]], [[TMP18]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP9]], [[TMP9]]) ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP17]], [[INTERLEAVED_MASK3]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP19]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] @@ -240,10 +240,10 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg [[TMP13]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP14]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP15]], [[TMP12]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] @@ -375,10 +375,10 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = zext nneg [[TMP15]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP16]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP17]], [[TMP14]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] @@ -536,10 +536,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave4.nxv64i8( [[TMP17]], [[TMP18]], [[TMP19]], [[TMP20]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP9]], [[TMP9]], [[TMP9]], [[TMP9]]) ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr align 1 [[TMP22]], [[INTERLEAVED_MASK3]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 8108320fd54ab..4c54511feeaab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -11,7 +11,6 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-LABEL: Checking a loop in 'simple_memset' ; VPLANS: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { ; VPLANS-NEXT: Live-in vp<[[VF:%.+]]> = VF -; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; VPLANS: vp<[[TC:%[0-9]+]]> = original trip-count ; VPLANS-EMPTY: ; VPLANS-NEXT: ir-bb: @@ -21,20 +20,20 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-NEXT: vector.ph: ; VPLANS-NEXT: EMIT vp<[[NEWTC:%[0-9]+]]> = TC > VF ? TC - VF : 0 vp<[[TC]]> ; VPLANS-NEXT: EMIT vp<[[VF_PER_PART:%.+]]> = VF * Part + ir<0> -; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%.+]]> = active lane mask vp<[[VF_PER_PART]]>, vp<[[TC]]> +; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%.+]]> = active lane mask vp<[[VF_PER_PART]]>, vp<[[TC]]>, ir<1> ; VPLANS-NEXT: Successor(s): vector loop ; VPLANS-EMPTY: ; VPLANS-NEXT: vector loop: { +; VPLANS-NEXT: vp<[[INDV:%[0-9]+]]> = CANONICAL-IV +; VPLANS-EMPTY: ; VPLANS-NEXT: vector.body: -; VPLANS-NEXT: EMIT vp<[[INDV:%[0-9]+]]> = CANONICAL-INDUCTION ; VPLANS-NEXT: ACTIVE-LANE-MASK-PHI vp<[[LANEMASK_PHI:%[0-9]+]]> = phi vp<[[LANEMASK_ENTRY]]>, vp<[[LANEMASK_LOOP:%.+]]> ; VPLANS-NEXT: vp<[[STEP:%[0-9]+]]> = SCALAR-STEPS vp<[[INDV]]>, ir<1>, vp<[[VF]]> ; VPLANS-NEXT: CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEP]]> ; VPLANS-NEXT: vp<[[VEC_PTR:%[0-9]+]]> = vector-pointer ir<%gep> ; VPLANS-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%val>, vp<[[LANEMASK_PHI]]> -; VPLANS-NEXT: EMIT vp<[[INDV_UPDATE:%.+]]> = add vp<[[INDV]]>, vp<[[VFxUF]]> ; VPLANS-NEXT: EMIT vp<[[INC:%[0-9]+]]> = VF * Part + vp<[[INDV]]> -; VPLANS-NEXT: EMIT vp<[[LANEMASK_LOOP]]> = active lane mask vp<[[INC]]>, vp<[[NEWTC]]> +; VPLANS-NEXT: EMIT vp<[[LANEMASK_LOOP]]> = active lane mask vp<[[INC]]>, vp<[[NEWTC]]>, ir<1> ; VPLANS-NEXT: EMIT vp<[[NOT:%[0-9]+]]> = not vp<[[LANEMASK_LOOP]]> ; VPLANS-NEXT: EMIT branch-on-cond vp<[[NOT]]> ; VPLANS-NEXT: No successors @@ -62,10 +61,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP15]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index 3b8625eb76711..8d89a0c2822c7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -87,9 +87,9 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[WIDE_MASKED_LOAD]], splat (i32 42) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP4]], ptr align 4 [[TMP5]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index ae7c9d263c179..9aefc85fbbcdf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -29,10 +29,10 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP14]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) @@ -63,10 +63,10 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], zeroinitializer ; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP13]]) ; CHECK-IN-LOOP-NEXT: [[TMP15]] = add i32 [[VEC_PHI]], [[TMP14]] -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = xor i1 [[TMP18]], true +; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] @@ -113,10 +113,10 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) ; CHECK-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] @@ -145,10 +145,10 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], splat (float -0.000000e+00) ; CHECK-IN-LOOP-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true +; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-IN-LOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] @@ -199,10 +199,10 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP17:%.*]] = xor [[VEC_PHI]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[TMP17]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP20]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP16]], true +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP20]]) @@ -236,10 +236,10 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = select [[TMP15]], [[WIDE_MASKED_LOAD1]], zeroinitializer ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP17]]) ; CHECK-IN-LOOP-NEXT: [[TMP19]] = xor i32 [[VEC_PHI]], [[TMP18]] -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = xor i1 [[TMP22]], true +; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]] ; CHECK-IN-LOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-IN-LOOP: middle.block: ; CHECK-IN-LOOP-NEXT: br label [[FOR_INC:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index c8ecb7f864521..37a4117f26dad 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -53,7 +53,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP54]], [[ACTIVE_LANE_MASK7]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP57]], [[ACTIVE_LANE_MASK8]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP60]], [[ACTIVE_LANE_MASK9]]) -; CHECK-NEXT: [[INDEX_NEXT10]] = add i64 [[INDEX6]], [[TMP62]] ; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP64:%.*]] = shl nuw i64 [[TMP63]], 2 ; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX6]], [[TMP64]] @@ -69,6 +68,7 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT13]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP71]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP35:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true +; CHECK-NEXT: [[INDEX_NEXT10]] = add i64 [[INDEX6]], [[TMP62]] ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -161,7 +161,6 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP76]], [[TMP70]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP79]], [[TMP71]]) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP82]], [[TMP72]]) -; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP6]] ; CHECK-NEXT: [[TMP85:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP86:%.*]] = shl nuw i64 [[TMP85]], 2 ; CHECK-NEXT: [[TMP87:%.*]] = add i64 [[INDEX6]], [[TMP86]] @@ -177,6 +176,7 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP66:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP67:%.*]] = xor i1 [[TMP66]], true +; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 945d808d3fa3f..d193624e9c8ae 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -26,10 +26,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -70,10 +70,10 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP4]], <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX1]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = xor i1 [[TMP6]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], 4 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -118,10 +118,10 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[INDEX1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr align 4 [[TMP13]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP12]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -178,11 +178,11 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP19]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], align 4 [[TMP20]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -231,10 +231,10 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP13]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], align 4 [[TMP14]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] @@ -285,10 +285,10 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP12]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP14]], true +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] @@ -336,81 +336,14 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX1]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP14:%.*]] = icmp ne [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[BROADCAST_SPLAT]], [[TMP15]], poison) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], [[WIDE_MASKED_GATHER]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX1]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr align 4 [[TMP16]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; - -entry: - br label %for.body - -for.body: ; preds = %entry, %if.end - %index = phi i64 [ %index.next, %if.end ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %index - %0 = load i32, ptr %arrayidx, align 4 - %tobool.not = icmp eq i32 %0, 0 - br i1 %tobool.not, label %if.end, label %if.then - -if.then: ; preds = %for.body - %1 = load i32, ptr %src, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body - %val.0 = phi i32 [ %1, %if.then ], [ 0, %for.body ] - %arrayidx1 = getelementptr inbounds i32, ptr %dst, i64 %index - store i32 %val.0, ptr %arrayidx1, align 4 - %index.next = add nuw i64 %index, 1 - %exitcond.not = icmp eq i64 %index.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %for.inc, %entry - ret void -} - - -; The original loop had an unconditional uniform store. Let's make sure -; we don't artificially create new predicated blocks for the load. -define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) #0 { -; CHECK-LABEL: @uniform_store( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N:%.*]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP16]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_LOAD]], align 4 [[BROADCAST_SPLAT]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: @@ -458,11 +391,11 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 4 [[TMP12]], [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP15:%.*]] = fdiv [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP15]], ptr align 4 [[TMP12]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: @@ -514,11 +447,11 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD2]], splat (i32 1) ; CHECK-NEXT: [[TMP16:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[TMP15]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP16]], ptr align 4 [[TMP12]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = xor i1 [[TMP14]], true -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP1]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] ; CHECK: while.end.loopexit: @@ -561,7 +494,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll index f2e3b708d7820..c784657687272 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll @@ -27,10 +27,10 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF1-NEXT: [[TMP6:%.*]] = mul [[WIDE_MASKED_LOAD]], splat (i8 3) ; CHECK-UF1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-UF1-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP6]], ptr align 1 [[TMP13]], [[ACTIVE_LANE_MASK]]) -; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-UF1-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-UF1-NEXT: [[TMP11:%.*]] = xor i1 [[TMP14]], true +; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] ; CHECK-UF1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF1: middle.block: ; @@ -90,7 +90,6 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP26]], ptr align 1 [[TMP39]], [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP27]], ptr align 1 [[TMP42]], [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP28]], ptr align 1 [[TMP45]], [[ACTIVE_LANE_MASK8]]) -; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call @llvm.get.active.lane.mask.nxv64i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP58]] = call @llvm.vector.extract.nxv16i1.nxv64i1( [[ACTIVE_LANE_MASK_NEXT]], i64 48) ; CHECK-UF4-NEXT: [[TMP57]] = call @llvm.vector.extract.nxv16i1.nxv64i1( [[ACTIVE_LANE_MASK_NEXT]], i64 32) @@ -98,6 +97,7 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[TMP55]] = call @llvm.vector.extract.nxv16i1.nxv64i1( [[ACTIVE_LANE_MASK_NEXT]], i64 0) ; CHECK-UF4-NEXT: [[TMP59:%.*]] = extractelement [[TMP55]], i32 0 ; CHECK-UF4-NEXT: [[TMP60:%.*]] = xor i1 [[TMP59]], true +; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] ; CHECK-UF4-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-UF4: middle.block: ; @@ -145,10 +145,10 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF1-NEXT: [[TMP3:%.*]] = fmul [[WIDE_MASKED_LOAD]], splat (double 3.000000e+00) ; CHECK-UF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-UF1-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP3]], ptr align 8 [[TMP8]], [[ACTIVE_LANE_MASK]]) -; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-UF1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP13]]) ; CHECK-UF1-NEXT: [[TMP7:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-UF1-NEXT: [[TMP6:%.*]] = xor i1 [[TMP7]], true +; CHECK-UF1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-UF1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-UF1: middle.block: ; @@ -211,7 +211,6 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP17]], ptr align 8 [[TMP34]], [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP18]], ptr align 8 [[TMP37]], [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP19]], ptr align 8 [[TMP40]], [[ACTIVE_LANE_MASK8]]) -; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP53]] = call @llvm.vector.extract.nxv2i1.nxv8i1( [[ACTIVE_LANE_MASK_NEXT]], i64 6) ; CHECK-UF4-NEXT: [[TMP52]] = call @llvm.vector.extract.nxv2i1.nxv8i1( [[ACTIVE_LANE_MASK_NEXT]], i64 4) @@ -219,6 +218,7 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[TMP50]] = call @llvm.vector.extract.nxv2i1.nxv8i1( [[ACTIVE_LANE_MASK_NEXT]], i64 0) ; CHECK-UF4-NEXT: [[TMP54:%.*]] = extractelement [[TMP50]], i32 0 ; CHECK-UF4-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true +; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] ; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-UF4: middle.block: ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll index 444c6dd269425..240ea96c2b79f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll @@ -24,8 +24,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: [[IV:.*]] = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]] ; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>, [[VF]] ; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] ; CHECK-NEXT: CLONE [[IDX:.*]] = load [[GEP_IDX]] @@ -34,7 +35,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: CLONE [[HISTVAL:.*]] = load [[GEP_BUCKET]] ; CHECK-NEXT: CLONE [[UPDATE:.*]] = add nsw [[HISTVAL]], ir<1> ; CHECK-NEXT: CLONE store [[UPDATE]], [[GEP_BUCKET]] -; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]] +; CHECK-NEXT: EMIT [[IV_NEXT:.*]] = add nuw [[IV]], [[VFxUF]] ; CHECK-NEXT: EMIT branch-on-count [[IV_NEXT]], [[VTC]] ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -72,8 +73,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: [[IV:.*]] = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]] ; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>, [[VF]] ; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] ; CHECK-NEXT: [[VECP_IDX:vp.*]] = vector-pointer [[GEP_IDX]] @@ -81,7 +83,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN-CAST [[EXT_IDX:.*]] = zext [[IDX]] to i64 ; CHECK-NEXT: WIDEN-GEP Inv[Var] [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]] ; CHECK-NEXT: WIDEN-HISTOGRAM buckets: [[GEP_BUCKET]], inc: ir<1> -; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]] +; CHECK-NEXT: EMIT [[IV_NEXT:.*]] = add nuw [[IV]], [[VFxUF]] ; CHECK-NEXT: EMIT branch-on-count [[IV_NEXT]], [[VTC]] ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 871d9be609bd7..e581e23518142 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -556,9 +556,9 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK-NEXT: [[TMP9:%.*]] = zext [[WIDE_LOAD]] to ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], [[TMP9]] ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 6da3c77cd35c1..e40971a075392 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -22,8 +22,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> @@ -69,8 +70,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> @@ -121,8 +123,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> @@ -168,8 +171,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> @@ -219,8 +223,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> @@ -266,8 +271,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll index a14ea745c6ac0..0ca5184a77a8c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -27,10 +27,10 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IDX]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr align 4 [[TMP6]], <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IDX]], 4 ; CHECK-NEXT: [[NEXT_ACTIVE_LANE_MASK]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[IDX]], i64 [[N2]]) ; CHECK-NEXT: [[EXTRACT_FIRST_LANE_MASK:%.*]] = extractelement <4 x i1> [[NEXT_ACTIVE_LANE_MASK]], i32 0 ; CHECK-NEXT: [[FIRST_LANE_SET:%.*]] = xor i1 [[EXTRACT_FIRST_LANE_MASK]], true +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IDX]], 4 ; CHECK-NEXT: br i1 [[FIRST_LANE_SET]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -82,11 +82,11 @@ define void @cond_uniform_load(ptr noalias nocapture %dst, ptr nocapture readonl ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX6]] ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[PREDPHI]], ptr align 4 [[TMP7]], <4 x i1> [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX6]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX6]], i64 [[TMP3]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = xor i1 [[TMP8]], true -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX6]], 4 +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] ; CHECK: [[FOR_END]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index de70da6d2558b..dabefa5eded4c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -157,10 +157,10 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr align 4 [[TMP11]], [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP15]], true +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll index dcb890670e33b..9d72734631db3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll @@ -25,9 +25,9 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; CHECK-NEXT: [[TMP6:%.*]] = call @foo_uniform( [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP6]], ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -63,13 +63,13 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]] ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP10]], ptr align 8 [[TMP12]], [[ACTIVE_LANE_MASK]]) ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr align 8 [[TMP14]], [[ACTIVE_LANE_MASK2]]) -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void @@ -111,9 +111,9 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; CHECK-NEXT: [[TMP6:%.*]] = call @bar_uniform( [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP6]], ptr align 8 [[TMP7]], [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -149,13 +149,13 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]] ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP10]], ptr align 8 [[TMP12]], [[ACTIVE_LANE_MASK]]) ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP11]], ptr align 8 [[TMP14]], [[ACTIVE_LANE_MASK2]]) -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]]) ; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void @@ -227,10 +227,10 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 ; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]] ; INTERLEAVE: pred.store.continue4: -; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]] ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = icmp ult i64 [[TMP10]], [[TMP0]] +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 ; INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK_NEXT]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE: for.cond.cleanup: ; INTERLEAVE-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll index 49f663f5703b6..c087764f830a1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool /Users/florianhahn/projects/llvm-project/builds/release-with-assertions/ccache-stage1/bin/opt --version 6 ; REQUIRES: asserts ; RUN: opt -mattr=+neon,+dotprod -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -disable-output %s 2>&1 | FileCheck %s @@ -21,8 +22,9 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi vp<[[RDX_START]]>, vp<[[REDUCE:%.+]]> (VF scaled by 1/4) ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]> @@ -32,8 +34,8 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) { ; CHECK-NEXT: vp<[[PTR_B:%.+]]> = vector-pointer ir<%gep.b> ; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]> ; CHECK-NEXT: EXPRESSION vp<[[REDUCE]]> = ir<[[ACC]]> + partial.reduce.add (mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32)) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT vp<[[INDEX_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[INDEX_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index 5999707699970..84fb0b9384d11 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -20,8 +20,9 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> @@ -67,8 +68,9 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll index bdf832f32964f..f92cbc029ffe4 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll @@ -22,7 +22,6 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 1 for VF 2: induction instruction %inc = add nuw nsw i32 %i.016, 1 ; CHECK: Cost of 0 for VF 2: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n -; CHECK: Cost of 0 for VF 2: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 2: vp<{{.+}}> = SCALAR-STEPS vp<{{.+}}>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<{{.+}}> ; CHECK: Cost of 0 for VF 2: vp<{{.+}}> = vector-pointer ir<%arrayidx> @@ -39,7 +38,6 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 1 for VF 4: induction instruction %inc = add nuw nsw i32 %i.016, 1 ; CHECK: Cost of 0 for VF 4: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n -; CHECK: Cost of 0 for VF 4: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 4: vp<{{.+}}> = SCALAR-STEPS vp<{{.+}}>, ir<1> ; CHECK: Cost of 0 for VF 4: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<{{.+}}> ; CHECK: Cost of 0 for VF 4: vp<{{.+}}> = vector-pointer ir<%arrayidx> @@ -56,7 +54,6 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: Cost of 1 for VF 8: induction instruction %inc = add nuw nsw i32 %i.016, 1 ; CHECK: Cost of 0 for VF 8: induction instruction %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i32 %inc, %n -; CHECK: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 8: vp<{{.+}}> = SCALAR-STEPS vp<{{.+}}>, ir<1> ; CHECK: Cost of 0 for VF 8: CLONE ir<%arrayidx> = getelementptr inbounds ir<%s>, vp<{{.+}}> ; CHECK: Cost of 0 for VF 8: vp<{{.+}}> = vector-pointer ir<%arrayidx> @@ -135,8 +132,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 2: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 2: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 2: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 2: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 2: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 2: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 2: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> @@ -167,8 +163,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 4: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 4: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 4: EMIT vp<[[CAN_IV:%.]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 4: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 4: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 4: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 4: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> @@ -199,8 +194,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 8: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 8: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 8: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 8: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 8: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 8: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 8: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 8: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> @@ -231,8 +225,7 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: Cost of 0 for VF 16: induction instruction %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1 ; CHECK: Cost of 0 for VF 16: induction instruction %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: Cost of 1 for VF 16: exit condition instruction %cmp.not = icmp eq i32 %dec, 0 -; CHECK: Cost of 0 for VF 16: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> -; CHECK: Cost of 0 for VF 16: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 16: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep> = ptradd ir<%pSrcA>, vp<[[STEPS1]]> ; CHECK: Cost of 0 for VF 16: vp<[[STEPS2:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK: Cost of 0 for VF 16: EMIT vp<%next.gep>.1 = ptradd ir<%pDst>, vp<[[STEPS2]]> diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 6ea075f76aed4..1b7b95f75ee4a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -347,10 +347,10 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_CONTINUE35]]: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] ; DEFAULT: [[FOR_COND_CLEANUP]]: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll index a51fe3248f90e..54e5f60f87bbd 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll @@ -21,8 +21,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_INC:%.*]]> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -88,8 +89,9 @@ define void @safe_dep(ptr %p) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_INC:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%a1> = getelementptr ir<%p>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VPTR1:%.+]]> = vector-pointer ir<%a1> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll index 46695221c27db..d99113d7b27a6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-riscv-vector-reverse.ll @@ -23,8 +23,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[INDUCTION:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[INDUCTION:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.+]]> ; CHECK-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%.+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ vp<[[OTC]]>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; CHECK-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -39,9 +40,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<[[ARRAY_IDX_A:%.+]]> = getelementptr inbounds ir<[[A:%.+]]>, ir<[[IDX_PROM]]> ; CHECK-NEXT: vp<[[VEC_END_PTR_A:%.+]]> = vector-end-pointer ir<[[ARRAY_IDX_A]]>, vp<[[EVL]]> ; CHECK-NEXT: WIDEN vp.store vp<[[VEC_END_PTR_A]]>, ir<[[ADD_RESULT]]>, vp<[[EVL]]> -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[EVL]]>, vp<[[EVL_PHI]]> -; CHECK-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[EVL]]> -; CHECK-NEXT: EMIT vp<[[INDEX_NEXT]]> = add vp<[[INDUCTION]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[EVL]]>, vp<[[EVL_PHI]]> +; CHECK-NEXT: EMIT vp<[[AVL_NEXT:%.+]]> = sub nuw vp<[[AVL]]>, vp<[[EVL]]> +; CHECK-NEXT: EMIT vp<[[INDEX_NEXT:%.+]]> = add vp<[[INDUCTION]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[INDEX_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll index c05878995f474..a38c4eeb1ab35 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll @@ -17,8 +17,9 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -74,8 +75,9 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -131,8 +133,9 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -188,8 +191,9 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -245,8 +249,9 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -297,8 +302,9 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -349,8 +355,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -405,8 +412,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -461,8 +469,9 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll index 8d3fe484e6468..dbaf75ad11f3d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll @@ -16,8 +16,9 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -30,7 +31,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -70,8 +71,9 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -84,7 +86,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -122,8 +124,9 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -136,7 +139,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -174,8 +177,9 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -188,7 +192,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -226,8 +230,9 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -240,7 +245,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -278,8 +283,9 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -292,7 +298,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -330,8 +336,9 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -344,7 +351,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -382,8 +389,9 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -396,7 +404,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -434,8 +442,9 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -448,7 +457,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOUI]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -486,8 +495,9 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -500,7 +510,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[INTTOPTR]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> @@ -538,8 +548,9 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[INDEX:%.+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[INDEX:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[INDEX_NEXT:%.+]]> ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[INDEX_EVL:%.+]]> = phi ir<0>, vp<[[INDEX_EVL_NEXT:%.+]]> ; IF-EVL-NEXT: ir<[[IV:%.+]]> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] @@ -553,7 +564,7 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: EMIT-SCALAR vp<[[ZEXT:%.+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[INDEX_EVL_NEXT]]> = add vp<[[ZEXT]]>, vp<[[INDEX_EVL]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[ZEXT]]> -; IF-EVL-NEXT: EMIT vp<[[INDEX_NEXT]]> = add vp<[[INDEX]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT vp<[[INDEX_NEXT:%.+]]> = add vp<[[INDEX]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[INDEX_NEXT]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors ; IF-EVL-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll index ab4bb90b55b70..20b2198f0c118 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -22,8 +22,9 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL-EMPTY: ; IF-EVL: vector loop: { +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI:%.+]]> = phi ir<33>, ir<[[LD:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%TC>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] @@ -39,8 +40,8 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> -; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT:%.+]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index dff4971ffdfa1..4c2afc2b12a7c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -36,8 +36,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): vector loop ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: vector loop: { -; IF-EVL-OUTLOOP-NEXT: vector.body: -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-OUTLOOP-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-OUTLOOP-EMPTY: +; IF-EVL-OUTLOOP-NEXT: vector.body: ; IF-EVL-OUTLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_SELECT:%.+]]> ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] @@ -49,8 +50,8 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> -; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[AVL_NEXT:%.+]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-OUTLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-OUTLOOP-NEXT: No successors @@ -76,8 +77,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): vector loop ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: vector loop: { -; IF-EVL-INLOOP-NEXT: vector.body: -; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-INLOOP-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-INLOOP-EMPTY: +; IF-EVL-INLOOP-NEXT: vector.body: ; IF-EVL-INLOOP-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] @@ -88,8 +90,8 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> -; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL_NEXT:%.+]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-INLOOP-NEXT: No successors @@ -116,8 +118,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): vector loop ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: vector loop: { -; NO-VP-OUTLOOP-NEXT: vector.body: -; NO-VP-OUTLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-OUTLOOP-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; NO-VP-OUTLOOP-EMPTY: +; NO-VP-OUTLOOP-NEXT: vector.body: ; NO-VP-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; NO-VP-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> @@ -164,8 +167,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): vector loop ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: vector loop: { -; NO-VP-INLOOP-NEXT: vector.body: -; NO-VP-INLOOP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-INLOOP-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; NO-VP-INLOOP-EMPTY: +; NO-VP-INLOOP-NEXT: vector.body: ; NO-VP-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; NO-VP-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index b3a611eac72fc..b0bf8140a657d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -21,8 +21,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector loop: { -; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: +; IF-EVL-NEXT: vector.body: ; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> @@ -38,8 +39,8 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 -; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> -; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT:%.+]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> ; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors @@ -55,8 +56,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: Successor(s): vector loop ; NO-VP-EMPTY: ; NO-VP-NEXT: vector loop: { -; NO-VP-NEXT: vector.body: -; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; NO-VP-EMPTY: +; NO-VP-NEXT: vector.body: ; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> @@ -104,8 +106,9 @@ define void @safe_dep(ptr %p) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr ir<%p>, vp<[[ST]]> ; CHECK-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll index bb85b88f181f7..d72e33a0355a2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll @@ -10,7 +10,6 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1 ; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 -; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> @@ -28,7 +27,6 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 ; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] ; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 -; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> ; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll index 2cda2533e80e0..5a18aed0c0556 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll @@ -32,9 +32,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: Cost of 1 for VF 2: induction instruction %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] ; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i32 %lftr.wideiv, %n ; CHECK: Cost of 0 for VF 2: exit condition instruction %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -; CHECK: Cost of 0 for VF 2: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK: Cost of 1 for VF 2: WIDEN-REDUCTION-PHI ir<%sum.013> = phi vp<{{.+}}>, vp<[[EXT:%.+]]> -; CHECK: Cost of 0 for VF 2: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> +; CHECK: Cost of 0 for VF 2: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV:%.+]]>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK: Cost of 0 for VF 2: vp<[[VECP1:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%0> = load vp<[[VECP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll index 123f5b8b03acb..182cb0d949d05 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll @@ -22,8 +22,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL-EMPTY: ; IF-EVL-NEXT: vector loop: { -; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; IF-EVL-EMPTY: +; IF-EVL-NEXT: vector.body: ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; IF-EVL-NEXT: EMIT vp<[[VIV:%[0-9]+]]> = WIDEN-CANONICAL-INDUCTION vp<[[IV]]> ; IF-EVL-NEXT: EMIT vp<[[MASK:%[0-9]+]]> = icmp ule vp<[[VIV]]>, vp<[[BETC]]> @@ -52,8 +53,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: Successor(s): vector loop ; NO-VP-EMPTY: ; NO-VP-NEXT: vector loop: { -; NO-VP-NEXT: vector.body: -; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-NEXT: vp<[[IV:%[0-9]+]]> = CANONICAL-IV +; NO-VP-EMPTY: +; NO-VP-NEXT: vector.body: ; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 1376a687b38b8..f1078e564a03b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -17,8 +17,9 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<[[FOR1_SPLICE:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> @@ -94,8 +95,9 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<[[FOR1_SPLICE:%.+]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.3> = phi ir<33>, vp<[[FOR2_SPLICE:%.+]]> @@ -184,8 +186,9 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> @@ -260,8 +263,9 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.x> = phi ir<0>, ir<%for.x.next> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.y> = phi ir<0>, ir<%for.x.prev> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 9deab9063d710..d7ec422f1ac8a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -22,8 +22,9 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -119,8 +120,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> @@ -194,8 +196,9 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi vp<[[RDX_START]]>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> @@ -257,8 +260,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -362,8 +366,9 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> @@ -440,8 +445,9 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%.pn> = phi ir<0>, ir<[[L:%.+]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<2> + vp<[[CAN_IV]]> * ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index 43500baba6dfd..ce13643e57a4a 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -50,8 +50,9 @@ for.end: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]] ; CHECK-NEXT: EMIT vp<[[COND:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%cond0> = icmp ult ir<%iv>, ir<13> diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 64caecc847096..fa06534423db2 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -19,8 +19,9 @@ ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { +; DBG-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; DBG-EMPTY: ; DBG-NEXT: vector.body: -; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[CAN_IV]]> * ir<1> ; DBG-NEXT: vp<[[IV_STEPS:%.]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>, vp<[[VF]]> ; DBG-NEXT: CLONE ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>) @@ -81,8 +82,9 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { +; DBG-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; DBG-EMPTY: ; DBG-NEXT: vector.body: -; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir + vp<[[CAN_IV]]> * ir ; DBG-NEXT: vp<[[STEPS1:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir ; DBG-NEXT: Successor(s): pred.store @@ -205,8 +207,9 @@ exit: ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { +; DBG-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; DBG-EMPTY: ; DBG-NEXT: vector.body: -; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]> ; DBG-NEXT: EMIT-SCALAR vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32 ; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, ir<1>, vp<[[VF]] diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 70b1ea13677b8..69efea36a22b1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -397,9 +397,9 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] -; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) -; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) ; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) @@ -723,9 +723,9 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]] ; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]] ; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]] -; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4) -; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]]) ; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]]) diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll index 5e88072517b37..fcfffba9041de 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll @@ -63,8 +63,8 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_CONTINUE4]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -139,8 +139,8 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -209,8 +209,8 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -285,8 +285,8 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] @@ -358,8 +358,8 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) { ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll index e4322cfcc00ac..51ef32eb4ff95 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll @@ -95,8 +95,8 @@ define i32 @chained_smax(i32 %x, ptr %src) { ; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]]) diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll index 61ed49296070e..e3a3224c77bff 100644 --- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -22,8 +22,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]] ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> @@ -109,8 +110,9 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values() { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> @@ -189,8 +191,9 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index 528f2448616e8..01d4581d404a2 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -39,8 +39,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: label="\ vector loop" ; CHECK-NEXT: N4 [label = ; CHECK-NEXT: "vector.body:\l" + -; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION ir\<0\>, vp\<[[CAN_IV_NEXT:%.+]]\>\l" + -; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>, vp\<[[VF]]\>\l" + +; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV:%.+]]\>, ir\<1\>, vp\<[[VF]]\>\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr inbounds ir\<%y\>, vp\<[[STEPS]]\>\l" + ; CHECK-NEXT: " vp\<[[VEC_PTR:%.+]]\> = vector-pointer ir\<%arrayidx\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load vp\<[[VEC_PTR]]\>\l" + @@ -48,7 +47,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: " CLONE ir\<%arrayidx2\> = getelementptr inbounds ir\<%x\>, vp\<[[STEPS]]\>\l" + ; CHECK-NEXT: " vp\<[[VEC_PTR2:%.+]]\> = vector-pointer ir\<%arrayidx2\>\l" + ; CHECK-NEXT: " WIDEN store vp\<[[VEC_PTR2]]\>, ir\<%call\>\l" + -; CHECK-NEXT: " EMIT vp\<[[CAN_IV_NEXT]]\> = add nuw vp\<[[CAN_IV]]\>, vp\<[[VFxUF]]\>\l" + +; CHECK-NEXT: " EMIT vp\<[[CAN_IV_NEXT:%.+]]\> = add nuw vp\<[[CAN_IV]]\>, vp\<[[VFxUF]]\>\l" + ; CHECK-NEXT: " EMIT branch-on-count vp\<[[CAN_IV_NEXT]]\>, vp\<[[VEC_TC]]\>\l" + ; CHECK-NEXT: "No successors\l" ; CHECK-NEXT: ] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 0892500da90e3..d556ef7972db7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -16,8 +16,9 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%dst>, vp<[[STEPS:%.+]]> @@ -72,8 +73,9 @@ define void @iv_expand(ptr %p, i64 %n) { ; CHECK-LABEL: LV: Checking a loop in 'iv_expand' ; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' { ; CHECK: vector loop: { +; CHECK-NEXT: vp<{{.+}}> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> ; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> ; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 37cb1d2331d48..a64a0f3caf6e7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -23,8 +23,9 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[PADD:%.+]]> = ptradd ir<%A>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VPTR:%.]]> = vector-pointer vp<[[PADD]]> @@ -32,7 +33,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10> ; CHECK-NEXT: vp<[[VPTR2:%.+]]> = vector-pointer vp<[[PADD]]> ; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV:%.+]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index ef678ff759943..bbdc7e438522e 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -20,15 +20,16 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<[[RDX_START]]>, ir<%red.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -88,15 +89,16 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<[[RDX_START]]>, ir<%red.next> ; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -158,8 +160,9 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi vp<[[RDX_START]]>, ir<%muladd> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> @@ -170,7 +173,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: WIDEN ir<%l.b> = load vp<[[VEC_PTR2]]> ; CHECK-NEXT: EMIT vp<[[FMUL:%.+]]> = fmul nnan ninf nsz ir<%l.a>, ir<%l.b> ; CHECK-NEXT: REDUCE ir<[[MULADD:%.+]]> = ir<%sum.07> + nnan ninf nsz reduce.fadd (vp<[[FMUL]]>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -197,7 +200,6 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-NEXT:} - entry: br label %loop @@ -221,8 +223,9 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) { ; CHECK-LABEL: Checking a loop in 'find_last_iv' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<{{.+}}> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%rdx> = phi ir<-9223372036854775808>, ir<%cond> ; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -282,15 +285,16 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<[[LOAD:%.+]]> = load vp<[[ADDR]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> zext to i64) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -327,8 +331,9 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> @@ -338,7 +343,7 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i ; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]> ; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul nsw ir<[[LOAD0]]>, ir<[[LOAD1]]>) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -377,8 +382,9 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> @@ -388,7 +394,7 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado ; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]> ; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD1]]> sext to i64)) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -430,15 +436,16 @@ define i64 @print_extended_sub_reduction(ptr nocapture readonly %x, ptr nocaptur ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<[[LOAD:%.+]]> = load vp<[[ADDR]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.sub (ir<[[LOAD]]> zext to i64) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -478,8 +485,9 @@ define i32 @print_mulacc_sub(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<%4> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%3>, vp<%8> ; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%5> @@ -596,8 +604,9 @@ define i32 @print_mulacc_negated(ptr %a, ptr %b) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<%4> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%3>, vp<%8> ; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0> ; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%5> @@ -714,8 +723,9 @@ define i64 @print_mulacc_sub_extended(ptr nocapture readonly %x, ptr nocapture r ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> @@ -725,7 +735,7 @@ define i64 @print_mulacc_sub_extended(ptr nocapture readonly %x, ptr nocapture r ; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]> ; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.sub (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD1]]> sext to i64)) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -767,15 +777,16 @@ define i64 @print_mulacc_duplicate_extends(ptr nocapture readonly %x, ptr nocapt ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]> ; CHECK-NEXT: WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.sub (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD0]]> sext to i64)) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -819,15 +830,16 @@ define i32 @print_mulacc_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32)) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -873,8 +885,9 @@ define i32 @print_mulacc_extended_const_lhs(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> @@ -882,7 +895,7 @@ define i32 @print_mulacc_extended_const_lhs(ptr %start, ptr %end) { ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32 ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul ir<63>, ir<%l.ext>) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -929,8 +942,9 @@ define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> @@ -938,7 +952,7 @@ define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul ir<%l.ext>, ir<128>) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -985,15 +999,16 @@ define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<%next.gep> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul (ir<%l> zext to i64), (ir<63> zext to i64)) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -1041,8 +1056,9 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<[[STEPS]]> @@ -1051,7 +1067,7 @@ define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) { ; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128> ; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<%mul> sext to i64) -; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 91e0037d12c61..3a7705a61c560 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -21,8 +21,9 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> @@ -31,7 +32,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -89,8 +90,9 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr inbounds ir<%y>, ir<%iv> @@ -102,7 +104,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx2> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -163,8 +165,9 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%i> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%i>, ir<5> @@ -191,7 +194,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -258,8 +261,9 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<4> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<4>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr inbounds ir<@AB>, ir<0>, vp<[[STEPS]]> @@ -274,7 +278,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: store ir<1> to index 1 ; CHECK-NEXT: store ir<2> to index 2 ; CHECK-NEXT: store ir<%AB.3> to index 3 -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -344,8 +348,9 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr inbounds ir<%asd>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%isd> @@ -377,7 +382,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: BLEND ir<%ysd.0> = ir<%psd> vp<[[PHI]]>/vp<[[OR1]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT:} @@ -454,15 +459,16 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]> (truncated to i8) ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * vp<[[EXP_SCEV]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[EXP_SCEV]]> ; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%iv>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%v3>, ir<%gep> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -521,15 +527,16 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -587,8 +594,9 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.y> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.y> @@ -599,7 +607,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%div> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -658,8 +666,9 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> @@ -669,7 +678,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%div.1>, ir<%div.2> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -727,8 +736,9 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%ld.addr> = getelementptr inbounds ir<%src>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%ld.addr> @@ -759,7 +769,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%st.addr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -827,8 +837,9 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> @@ -838,7 +849,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%or.1>, ir<%or.2> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.x> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -896,8 +907,9 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%p>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> @@ -905,7 +917,7 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l> ; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-element ir<%zext> ; CHECK-NEXT: CLONE store vp<[[EXT]]>, ir<%p1> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -942,8 +954,9 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> @@ -1015,8 +1028,9 @@ define void @print_select_with_fastmath_flags(ptr noalias %a, ptr noalias %b, pt ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK: vector loop: { +; CHECK-NEXT: vp<[[IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT_EXIT:%.+]]> ; CHECK-NEXT: vp<[[ST:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%b>, vp<[[ST]]> ; CHECK-NEXT: vp<[[PTR1:%.+]]> = vector-pointer ir<[[GEP1]]> @@ -1030,7 +1044,7 @@ define void @print_select_with_fastmath_flags(ptr noalias %a, ptr noalias %b, pt ; CHECK-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds nuw ir<%a>, vp<[[ST]]> ; CHECK-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> ; CHECK-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[SELECT]]> -; CHECK-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index 9550454b1a3c2..10f9f7f1b7538 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -21,8 +21,9 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%tmp3> = load ir<%tmp2> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 994e9c1ce64fa..ce84e4636f7ec 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -25,8 +25,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -94,8 +95,9 @@ exit: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -179,8 +181,9 @@ exit: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -265,8 +268,9 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<21>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<21> + vp<[[CAN_IV]]> * ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> @@ -339,8 +343,9 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ult ir<%iv>, ir<%j> @@ -437,8 +442,9 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -544,8 +550,9 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -651,8 +658,9 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> @@ -757,8 +765,9 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -823,8 +832,9 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[PRED:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -906,8 +916,9 @@ define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -974,8 +985,9 @@ define void @sinking_requires_duplication(ptr %addr) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%addr>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep> @@ -1049,8 +1061,9 @@ define void @merge_with_dead_gep_between_regions(i32 %n, i32 %k, ptr noalias %sr ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<%n>, ir<-1>, vp<[[VF]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: WIDEN ir<%cond> = icmp ult ir<%iv>, ir<%k> @@ -1142,8 +1155,9 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll index ef782b3b221bc..efd4ffb66ae91 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll @@ -20,8 +20,9 @@ define void @test_unused_interleave(ptr %src, i32 %length) { ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<%2> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%2>, vp<%0> ; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%1> ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-struct-return.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-struct-return.ll index f948906d4d0b8..bf6fd6f837d30 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-struct-return.ll @@ -16,8 +16,9 @@ define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%in>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[IN_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> @@ -72,8 +73,9 @@ define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vp<[[CAN_IV:%.+]]> = CANONICAL-IV +; CHECK-EMPTY: ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]] ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%in>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[IN_VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx> diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index b99d656c5c50f..949df7856f41b 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -57,9 +57,7 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) { EXPECT_EQ(&*Plan, VecBB->getPlan()); auto Iter = VecBB->begin(); - auto *CanIV = dyn_cast(&*Iter++); - EXPECT_NE(nullptr, CanIV); - auto *Phi = dyn_cast(&*Iter++); + VPWidenPHIRecipe *Phi = dyn_cast(&*Iter++); EXPECT_NE(nullptr, Phi); VPInstruction *Idx = dyn_cast(&*Iter++); @@ -218,7 +216,6 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); auto Iter = VecBB->begin(); - EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp index 582094bed3ef7..817af82bdbf43 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp @@ -23,32 +23,31 @@ using VPPatternMatchTest = VPlanTestBase; TEST_F(VPPatternMatchTest, ScalarIVSteps) { VPlan &Plan = getPlan(); + IntegerType *I64Ty = IntegerType::get(C, 64); + VPRegionBlock *VPR = + Plan.createVPRegionBlock(DebugLoc::getCompilerGenerated(), ""); + VPValue *CanIV = VPR->getCanonicalIV(); VPBasicBlock *VPBB = Plan.createVPBasicBlock(""); VPBuilder Builder(VPBB); - IntegerType *I64Ty = IntegerType::get(C, 64); - VPValue *StartV = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0)); - auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DebugLoc()); - Builder.insert(CanonicalIVPHI); - VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1)); VPValue *VF = &Plan.getVF(); - VPValue *Steps = Builder.createScalarIVSteps( - Instruction::Add, nullptr, CanonicalIVPHI, Inc, VF, DebugLoc()); + VPValue *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, CanIV, + Inc, VF, DebugLoc()); VPValue *Inc2 = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 2)); - VPValue *Steps2 = Builder.createScalarIVSteps( - Instruction::Add, nullptr, CanonicalIVPHI, Inc2, VF, DebugLoc()); + VPValue *Steps2 = Builder.createScalarIVSteps(Instruction::Add, nullptr, + CanIV, Inc2, VF, DebugLoc()); using namespace VPlanPatternMatch; - ASSERT_TRUE(match(Steps, m_ScalarIVSteps(m_Specific(CanonicalIVPHI), - m_SpecificInt(1), m_Specific(VF)))); + ASSERT_TRUE(match(Steps, m_ScalarIVSteps(m_Specific(CanIV), m_SpecificInt(1), + m_Specific(VF)))); ASSERT_FALSE( - match(Steps2, m_ScalarIVSteps(m_Specific(CanonicalIVPHI), - m_SpecificInt(1), m_Specific(VF)))); - ASSERT_TRUE(match(Steps2, m_ScalarIVSteps(m_Specific(CanonicalIVPHI), - m_SpecificInt(2), m_Specific(VF)))); + match(Steps2, m_ScalarIVSteps(m_Specific(CanIV), m_SpecificInt(1), + m_Specific(VF)))); + ASSERT_TRUE(match(Steps2, m_ScalarIVSteps(m_Specific(CanIV), m_SpecificInt(2), + m_Specific(VF)))); } TEST_F(VPPatternMatchTest, GetElementPtr) { diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index 50ad4d5fa61ff..9abd49fdabc5d 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -24,7 +24,6 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); VPInstruction *UseI = new VPInstruction(Instruction::Sub, {DefI}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPBasicBlock *VPBB1 = Plan.getEntry(); VPBB1->appendRecipe(UseI); @@ -59,15 +58,13 @@ TEST_F(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); VPInstruction *UseI = new VPInstruction(Instruction::Sub, {DefI}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {UseI}); VPBasicBlock *VPBB1 = Plan.getEntry(); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); VPBB1->appendRecipe(UseI); - VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(DefI); VPBB2->appendRecipe(BranchOnCond); @@ -100,9 +97,8 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 0)); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {DefI}); auto *Blend = new VPBlendRecipe(Phi, {DefI}, {}); VPBasicBlock *VPBB1 = Plan.getEntry(); @@ -110,7 +106,6 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPBasicBlock *VPBB3 = Plan.createVPBasicBlock(""); VPBasicBlock *VPBB4 = Plan.createVPBasicBlock(""); - VPBB2->appendRecipe(CanIV); VPBB3->appendRecipe(Blend); VPBB4->appendRecipe(DefI); VPBB4->appendRecipe(BranchOnCond); @@ -157,8 +152,6 @@ TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { VPPhi *Phi = new VPPhi({DefI}, {}); VPBB2->appendRecipe(Phi); VPBB2->appendRecipe(DefI); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - VPBB3->appendRecipe(CanIV); VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB3, VPBB3); VPBlockUtils::connectBlocks(VPBB1, VPBB2); @@ -186,9 +179,8 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPlan &Plan = getPlan(); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *I1 = new VPInstruction(Instruction::Add, {Zero}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {I1}); VPInstruction *BranchOnCond2 = new VPInstruction(VPInstruction::BranchOnCond, {I1}); @@ -197,7 +189,6 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPBB1->appendRecipe(I1); VPBB1->appendRecipe(BranchOnCond2); - VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(BranchOnCond); VPRegionBlock *R1 = Plan.createLoopRegion("R1", VPBB2, VPBB2); @@ -220,9 +211,8 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPlan &Plan = getPlan(); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); VPInstruction *I1 = new VPInstruction(Instruction::Add, {Zero}); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); VPInstruction *BranchOnCond = - new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + new VPInstruction(VPInstruction::BranchOnCond, {I1}); VPInstruction *BranchOnCond2 = new VPInstruction(VPInstruction::BranchOnCond, {I1}); @@ -231,7 +221,6 @@ TEST_F(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPBasicBlock *VPBB3 = Plan.createVPBasicBlock(""); VPBB1->appendRecipe(I1); - VPBB2->appendRecipe(CanIV); VPBB2->appendRecipe(BranchOnCond2); VPBB3->appendRecipe(BranchOnCond); @@ -260,8 +249,6 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) { VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - VPBB2->appendRecipe(CanIV); VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); VPInstruction *BranchOnCond = @@ -289,14 +276,11 @@ TEST_F(VPVerifierTest, BlockOutsideRegionWithParent) { TEST_F(VPVerifierTest, NonHeaderPHIInHeader) { VPlan &Plan = getPlan(); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - auto *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + auto *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {Zero}); VPBasicBlock *VPBB1 = Plan.getEntry(); VPBasicBlock *VPBB2 = Plan.createVPBasicBlock("header"); - VPBB2->appendRecipe(CanIV); - PHINode *PHINode = PHINode::Create(Type::getInt32Ty(C), 2); auto *IRPhi = new VPIRPhi(*PHINode); VPBB2->appendRecipe(IRPhi);