From e58cb9607323e5995f4e889d07b7e62e31bc8404 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 11 Oct 2024 12:28:30 +0100 Subject: [PATCH 01/17] [VPlan] Compute induction end values in VPlan. Use createDerivedIV to compute IV end values directly in VPlan, instead of creating them up-front. This allows updating IV users outside the loop as follow-up. Depends on https://github.com/llvm/llvm-project/pull/110004 and https://github.com/llvm/llvm-project/pull/109975. --- .../Vectorize/LoopVectorizationPlanner.h | 4 +- .../Transforms/Vectorize/LoopVectorize.cpp | 234 ++++++++++-------- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 + .../AArch64/clamped-trip-count.ll | 4 +- .../AArch64/conditional-branches-cost.ll | 4 +- .../AArch64/fixed-order-recurrence.ll | 4 +- .../AArch64/induction-costs-sve.ll | 2 +- .../LoopVectorize/AArch64/induction-costs.ll | 4 +- .../AArch64/low_trip_count_predicates.ll | 12 +- .../AArch64/mul-simplification.ll | 2 +- .../AArch64/reduction-recurrence-costs-sve.ll | 8 +- .../AArch64/scalable-avoid-scalarization.ll | 2 +- .../AArch64/scalable-strict-fadd.ll | 12 +- .../LoopVectorize/AArch64/sve-epilog-vect.ll | 2 +- .../AArch64/sve-inductions-unusual-types.ll | 4 +- .../AArch64/sve-interleaved-accesses.ll | 16 +- .../AArch64/sve-live-out-pointer-induction.ll | 4 +- .../LoopVectorize/AArch64/sve-tail-folding.ll | 2 +- .../LoopVectorize/AArch64/sve-widen-phi.ll | 12 +- .../LoopVectorize/PowerPC/exit-branch-cost.ll | 60 ++--- .../RISCV/blend-any-of-reduction-cost.ll | 2 +- .../RISCV/blocks-with-dead-instructions.ll | 14 +- .../LoopVectorize/RISCV/dead-ops-cost.ll | 32 +-- .../first-order-recurrence-scalable-vf1.ll | 2 +- .../LoopVectorize/RISCV/induction-costs.ll | 8 +- .../RISCV/masked_gather_scatter.ll | 4 +- .../LoopVectorize/RISCV/strided-accesses.ll | 16 +- ...-force-tail-with-evl-reverse-load-store.ll | 8 +- ...orize-force-tail-with-evl-uniform-store.ll | 2 +- .../predicated-first-order-recurrence.ll | 4 +- .../LoopVectorize/X86/cost-model.ll | 8 +- .../X86/fixed-order-recurrence.ll | 4 +- .../LoopVectorize/X86/induction-costs.ll | 2 +- .../Transforms/LoopVectorize/X86/optsize.ll | 4 +- .../X86/pr109581-unused-blend.ll | 2 +- .../Transforms/LoopVectorize/X86/pr72969.ll | 4 +- .../LoopVectorize/X86/scatter_crash.ll | 44 ++-- .../LoopVectorize/X86/small-size.ll | 58 ++--- .../LoopVectorize/branch-weights.ll | 2 +- .../epilog-vectorization-any-of-reductions.ll | 8 +- .../first-order-recurrence-chains.ll | 2 +- .../first-order-recurrence-complex.ll | 34 +-- .../LoopVectorize/first-order-recurrence.ll | 72 +++--- .../Transforms/LoopVectorize/induction.ll | 52 ++-- .../LoopVectorize/interleaved-accesses.ll | 4 +- .../invariant-store-vectorization.ll | 2 +- .../pr59319-loop-access-info-invalidation.ll | 34 +-- llvm/test/Transforms/LoopVectorize/pr66616.ll | 2 +- .../LoopVectorize/reduction-align.ll | 2 +- .../LoopVectorize/reduction-inloop-cond.ll | 8 +- .../LoopVectorize/scalable-inductions.ll | 8 +- .../LoopVectorize/select-reduction.ll | 4 +- ...e-reduction-results-in-tail-folded-loop.ll | 2 +- .../LoopVectorize/vplan-printing.ll | 47 ++-- 54 files changed, 469 insertions(+), 425 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index fbcf181a45a66..0298ab523307d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -233,8 +233,8 @@ class VPBuilder { VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, - VPValue *Step, const Twine &Name = "") { + VPValue *CanonicalIV, VPValue *Step, + const Twine &Name = "") { return tryInsertInstruction( new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step, Name)); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4a4553e4a8db8..1ee596502f9d4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -513,21 +513,14 @@ class InnerLoopVectorizer { /// Fix the non-induction PHIs in \p Plan. void fixNonInductionPHIs(VPTransformState &State); - /// Create a ResumePHI VPInstruction for the induction \p InductionPhiIRI to - /// resume iteration count in the scalar epilogue from where the vectorized - /// loop left off, and add it to the scalar preheader of VPlan. Also creates - /// the induction resume value, and the value for the bypass block, if needed. - /// \p Step is the SCEV-expanded induction step to use. In cases where the - /// loop skeleton is more complicated (i.e., epilogue vectorization) and the - /// resume values can come from an additional bypass block, - /// \p MainVectorTripCount provides the trip count of the main vector loop, - /// used to compute the resume value reaching the scalar loop preheader - /// directly from this additional bypass block. - void createInductionResumeVPValue(VPIRInstruction *InductionPhiIRI, - const InductionDescriptor &ID, Value *Step, - ArrayRef BypassBlocks, - VPBuilder &ScalarPHBuilder, - Value *MainVectorTripCount = nullptr); + /// Create the bypass resume value coming from the additional bypass block. \p + /// Step is the SCEV-expanded induction step to use. \p MainVectorTripCount + /// provides the trip count of the main vector loop, used to compute the + /// resume value reaching the scalar loop preheader directly from this + /// additional bypass block. + void createInductionBypassValue(PHINode *OrigPhi, + const InductionDescriptor &ID, Value *Step, + Value *MainVectorTripCount); /// Returns the original loop trip count. Value *getTripCount() const { return TripCount; } @@ -584,17 +577,10 @@ class InnerLoopVectorizer { /// vector loop preheader, middle block and scalar preheader. void createVectorLoopSkeleton(StringRef Prefix); - /// Create new phi nodes for the induction variables to resume iteration count - /// in the scalar epilogue, from where the vectorized loop left off. - /// In cases where the loop skeleton is more complicated (i.e. epilogue - /// vectorization), \p MainVectorTripCount provides the trip count of the main - /// loop, used to compute these resume values. If \p IVSubset is provided, it - /// contains the phi nodes for which resume values are needed, because they - /// will generate wide induction phis in the epilogue loop. - void - createInductionResumeVPValues(const SCEV2ValueTy &ExpandedSCEVs, - Value *MainVectorTripCount = nullptr, - SmallPtrSetImpl *IVSubset = nullptr); + /// Create values for the induction variables to resume iteration count + /// in the bypass block. + void createInductionBypassValues(const SCEV2ValueTy &ExpandedSCEVs, + Value *MainVectorTripCount); /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. @@ -2613,21 +2599,11 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { nullptr, Twine(Prefix) + "scalar.ph"); } -void InnerLoopVectorizer::createInductionResumeVPValue( - VPIRInstruction *InductionPhiRI, const InductionDescriptor &II, Value *Step, - ArrayRef BypassBlocks, VPBuilder &ScalarPHBuilder, +void InnerLoopVectorizer::createInductionBypassValue( + PHINode *OrigPhi, const InductionDescriptor &II, Value *Step, Value *MainVectorTripCount) { - // TODO: Move to LVP or general VPlan construction, once no IR values are - // generated. - auto *OrigPhi = cast(&InductionPhiRI->getInstruction()); - Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); - assert(VectorTripCount && "Expected valid arguments"); - Instruction *OldInduction = Legal->getPrimaryInduction(); - // For the primary induction the end values are known. - Value *EndValue = VectorTripCount; Value *EndValueFromAdditionalBypass = MainVectorTripCount; - // Otherwise compute them accordingly. if (OrigPhi != OldInduction) { IRBuilder<> B(LoopVectorPreHeader->getTerminator()); @@ -2635,10 +2611,6 @@ void InnerLoopVectorizer::createInductionResumeVPValue( if (isa_and_nonnull(II.getInductionBinOp())) B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); - EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(), - Step, II.getKind(), II.getInductionBinOp()); - EndValue->setName("ind.end"); - // Compute the end value for the additional bypass (if applicable). if (MainVectorTripCount) { B.SetInsertPoint(getAdditionalBypassBlock(), @@ -2650,22 +2622,12 @@ void InnerLoopVectorizer::createInductionResumeVPValue( } } - auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( - VPInstruction::ResumePhi, - {Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())}, - OrigPhi->getDebugLoc(), "bc.resume.val"); - assert(InductionPhiRI->getNumOperands() == 0 && - "InductionPhiRI should not have any operands"); - InductionPhiRI->addOperand(ResumePhiRecipe); - - if (EndValueFromAdditionalBypass) { - // Store the bypass value here, as it needs to be added as operand to its - // scalar preheader phi node after the epilogue skeleton has been created. - // TODO: Directly add as extra operand to the VPResumePHI recipe. - assert(!Induction2AdditionalBypassValue.contains(OrigPhi) && - "entry for OrigPhi already exits"); - Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass; - } + // Store the bypass value here, as it needs to be added as operand to its + // scalar preheader phi node after the epilogue skeleton has been created. + // TODO: Directly add as extra operand to the VPResumePHI recipe. + assert(!Induction2AdditionalBypassValue.contains(OrigPhi) && + "entry for OrigPhi already exits"); + Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass; } /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV @@ -2682,29 +2644,15 @@ static Value *getExpandedStep(const InductionDescriptor &ID, return I->second; } -void InnerLoopVectorizer::createInductionResumeVPValues( - const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount, - SmallPtrSetImpl *IVSubset) { - // We are going to resume the execution of the scalar loop. - // Go over all of the induction variable PHIs of the scalar loop header and - // fix their starting values, which depend on the counter of the last - // iteration of the vectorized loop. If we come from one of the - // LoopBypassBlocks then we need to start from the original start value. - // Otherwise we provide the trip count from the main vector loop. - VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader(); - VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin()); - for (VPRecipeBase &R : *Plan.getScalarHeader()) { - auto *PhiR = cast(&R); - auto *Phi = dyn_cast(&PhiR->getInstruction()); - if (!Phi) - break; - if (!Legal->getInductionVars().contains(Phi) || - (IVSubset && !IVSubset->contains(Phi))) - continue; - const InductionDescriptor &II = Legal->getInductionVars().find(Phi)->second; - createInductionResumeVPValue(PhiR, II, getExpandedStep(II, ExpandedSCEVs), - LoopBypassBlocks, ScalarPHBuilder, - MainVectorTripCount); +void InnerLoopVectorizer::createInductionBypassValues( + const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) { + assert(MainVectorTripCount && "Must have bypass information"); + + for (const auto &InductionEntry : Legal->getInductionVars()) { + PHINode *OrigPhi = InductionEntry.first; + const InductionDescriptor &II = InductionEntry.second; + createInductionBypassValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), + MainVectorTripCount); } } @@ -2766,8 +2714,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( // faster. emitMemRuntimeChecks(LoopScalarPreHeader); - // Emit phis for the new starting index of the scalar loop. - createInductionResumeVPValues(ExpandedSCEVs); + Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); + assert(VectorTripCount && "Expected valid arguments"); return {LoopVectorPreHeader, nullptr}; } @@ -7848,19 +7796,6 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton( // Generate the induction variable. EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); - // Generate VPValues and ResumePhi recipes for wide inductions in the epilogue - // plan only. Other inductions only need a resume value for the canonical - // induction, which will get created during epilogue skeleton construction. - SmallPtrSet WideIVs; - for (VPRecipeBase &H : - EPI.EpiloguePlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (auto *WideIV = dyn_cast(&H)) - WideIVs.insert(WideIV->getPHINode()); - else if (auto *PtrIV = dyn_cast(&H)) - WideIVs.insert(cast(PtrIV->getUnderlyingValue())); - } - createInductionResumeVPValues(ExpandedSCEVs, nullptr, &WideIVs); - return {LoopVectorPreHeader, nullptr}; } @@ -8049,13 +7984,14 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( EPResumeVal->addIncoming(Init, EPI.MainLoopIterationCountCheck); } - // Generate induction resume values. These variables save the new starting - // indexes for the scalar loop. They are used to test if there are any tail - // iterations left once the vector loop has completed. - // Note that when the vectorized epilogue is skipped due to iteration count - // check, then the resume value for the induction variable comes from - // the trip count of the main vector loop, passed as the second argument. - createInductionResumeVPValues(ExpandedSCEVs, EPI.VectorTripCount); + Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); + assert(VectorTripCount && "Expected valid arguments"); + + // Generate bypass values from the bypass blocks. Note that when the + // vectorized epilogue is skipped due to iteration count check, then the + // resume value for the induction variable comes from the trip count of the + // main vector loop, passed as the second argument. + createInductionBypassValues(ExpandedSCEVs, EPI.VectorTripCount); return {LoopVectorPreHeader, EPResumeVal}; } @@ -8858,13 +8794,64 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } +static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, + VPBuilder &Builder, + VPBuilder &ScalarPHBuilder, + VPTypeAnalysis &TypeInfo, + VPValue *VectorTC) { + PHINode *OrigPhi; + const InductionDescriptor *ID; + VPValue *Start; + VPValue *Step; + Type *ScalarTy; + bool IsCanonical = false; + if (auto *WideIV = dyn_cast(PhiR)) { + if (WideIV->getTruncInst()) + return nullptr; + OrigPhi = cast(WideIV->getUnderlyingValue()); + ID = &WideIV->getInductionDescriptor(); + Start = WideIV->getStartValue(); + Step = WideIV->getStepValue(); + ScalarTy = WideIV->getScalarType(); + IsCanonical = WideIV->isCanonical(); + } else if (auto *WideIV = dyn_cast(PhiR)) { + OrigPhi = cast(WideIV->getUnderlyingValue()); + ID = &WideIV->getInductionDescriptor(); + Start = WideIV->getStartValue(); + Step = WideIV->getOperand(1); + ScalarTy = Start->getLiveInIRValue()->getType(); + } else { + return nullptr; + } + + VPValue *EndValue = VectorTC; + if (!IsCanonical) { + EndValue = Builder.createDerivedIV( + ID->getKind(), + dyn_cast_or_null(ID->getInductionBinOp()), Start, + VectorTC, Step); + } + + if (ScalarTy != TypeInfo.inferScalarType(EndValue)) { + EndValue = Builder.createScalarCast(Instruction::Trunc, EndValue, ScalarTy); + } + + auto *ResumePhiRecipe = + ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start}, + OrigPhi->getDebugLoc(), "bc.resume.val"); + return ResumePhiRecipe; +} + /// Create resume phis in the scalar preheader for first-order recurrences and /// reductions and update the VPIRInstructions wrapping the original phis in the /// scalar header. static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { + VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); auto *ScalarPH = Plan.getScalarPreheader(); auto *MiddleVPBB = cast(ScalarPH->getSinglePredecessor()); VPBuilder ScalarPHBuilder(ScalarPH); + VPBuilder VectorPHBuilder( + cast(Plan.getVectorLoopRegion()->getSinglePredecessor())); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); VPValue *OneVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); @@ -8874,6 +8861,13 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { if (!ScalarPhiI) break; auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); + + if (VPValue *ResumePhi = addResumeValuesForInduction( + VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, + &Plan.getVectorTripCount())) { + ScalarPhiIRI->addOperand(ResumePhi); + continue; + } if (!isa(VectorPhiR)) continue; // The backedge value provides the value to resume coming out of a loop, @@ -9635,7 +9629,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); DerivedIV->setName(Name); - assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); State.set(this, DerivedIV, VPLane(0)); } @@ -10299,6 +10292,39 @@ bool LoopVectorizePass::processLoop(Loop *L) { EPI, &LVL, &CM, BFI, PSI, Checks, *BestMainPlan); + // Collect PHI nodes of wide inductions in the VPlan for the epilogue. + // Those will need their resume-values computed from the main vector + // loop. Others can be removed in the main VPlan. + SmallPtrSet WidenedPhis; + for (VPRecipeBase &R : + BestEpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + if (!isa(&R)) + continue; + if (isa(&R)) + WidenedPhis.insert( + cast(&R)->getPHINode()); + else + WidenedPhis.insert( + cast(R.getVPSingleValue()->getUnderlyingValue())); + } + for (VPRecipeBase &R : + *cast(BestMainPlan->getScalarHeader())) { + auto *VPIRInst = cast(&R); + auto *IRI = dyn_cast(&VPIRInst->getInstruction()); + if (!IRI) + break; + if (WidenedPhis.contains(IRI) || + !LVL.getInductionVars().contains(IRI)) + continue; + VPRecipeBase *ResumePhi = + VPIRInst->getOperand(0)->getDefiningRecipe(); + VPIRInst->setOperand(0, BestMainPlan->getOrAddLiveIn( + Constant::getNullValue(IRI->getType()))); + ResumePhi->eraseFromParent(); + } + VPlanTransforms::removeDeadRecipes(*BestMainPlan); + auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b438700a3fe2c..9e3567d04d233 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -64,6 +64,7 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::LogicalAnd: case VPInstruction::PtrAdd: + case VPInstruction::ResumePhi: return false; default: return true; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 1948211858d44..f1191fab8350c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -13,9 +13,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer @@ -100,9 +100,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 6956030570636..754b86ab2fb87 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -821,11 +821,11 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]] ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 8 ; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] ; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2 -; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll index f7a1eb455fc1a..a939969af852e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll @@ -48,8 +48,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4bb67c890f3cf..3d045de0ff238 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -821,8 +821,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) { ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1 ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2 ; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 ; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1 +; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 ; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer ; PRED-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 4d070a4d02c90..fc0d649d0b433 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -333,10 +333,10 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[P:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index a02323ab2108f..27cf432f0b45c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -105,9 +105,9 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 8 ; CHECK-VS1-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]] ; CHECK-VS1-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] -; CHECK-VS1-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC3]] ; CHECK-VS1-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS1-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 8 +; CHECK-VS1-NEXT: [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS1-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer ; CHECK-VS1-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -127,7 +127,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS1-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS1: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] ; CHECK-VS1-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS1: [[WHILE_BODY]]: ; CHECK-VS1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] @@ -213,9 +213,9 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 4 ; CHECK-VS2-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]] ; CHECK-VS2-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] -; CHECK-VS2-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC3]] ; CHECK-VS2-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS2-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 4 +; CHECK-VS2-NEXT: [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS2-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector [[BROADCAST_SPLATINSERT7]], poison, zeroinitializer ; CHECK-VS2-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -235,7 +235,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS2-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS2: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ] ; CHECK-VS2-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS2: [[WHILE_BODY]]: ; CHECK-VS2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] @@ -279,7 +279,7 @@ while.end: define void @trip_count_too_small(ptr nocapture noundef %p, i32 noundef %tc, i16 noundef %val) { ; CHECK-LABEL: define void @trip_count_too_small( -; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TC]], 3 ; CHECK-NEXT: br i1 [[CMP7]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]] @@ -428,9 +428,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll index 771dd00034c70..0ff98d2abe776 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll @@ -19,8 +19,8 @@ define i64 @mul_select_operand_known_1_via_scev() { ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[VEC_PHI]]) ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 0cea16d103678..3d4f7e0e4924b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -137,9 +137,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; VSCALEFORTUNING2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VSCALEFORTUNING2: scalar.ph: -; VSCALEFORTUNING2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT11:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; VSCALEFORTUNING2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP50]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; VSCALEFORTUNING2-NEXT: br label [[LOOP:%.*]] ; VSCALEFORTUNING2: loop: @@ -260,9 +260,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[TMP29]], i32 [[TMP47]] ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1]] ] +; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] ; PRED-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1]] ] ; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP44]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1]] ] ; PRED-NEXT: br label [[LOOP1:%.*]] ; PRED: loop: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index 41c92d6166332..a23d2d83998f7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -23,9 +23,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i32() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 6ecaff048ca3c..cb4fd04d1bc4f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -512,9 +512,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-UNORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-UNORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = insertelement splat (float -0.000000e+00), float [[A2]], i32 0 ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = insertelement splat (float -0.000000e+00), float [[A1]], i32 0 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -540,9 +540,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-UNORDERED: scalar.ph: -; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY]] ] ; CHECK-UNORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] +; CHECK-UNORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-UNORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-UNORDERED: for.body: ; CHECK-UNORDERED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] @@ -583,9 +583,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-ORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-ORDERED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-ORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -607,9 +607,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED: scalar.ph: -; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY]] ] ; CHECK-ORDERED-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] +; CHECK-ORDERED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-ORDERED-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED: for.body: ; CHECK-ORDERED-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] @@ -649,9 +649,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP5]] ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-ORDERED-TF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-ORDERED-TF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] @@ -684,9 +684,9 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[A2]], [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX2:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[A1]], [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 36af7eb57868b..b7eddd7fdbccf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -380,9 +380,9 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP24]] ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 10000, [[N_MOD_VF2]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]] ; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 0761e4aab79e1..d42743a802280 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -16,10 +16,10 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to @@ -83,10 +83,10 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 1e310ca5fcaed..d91b467a44865 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -467,9 +467,9 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 512, [[TMP1]] -; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -553,9 +553,9 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP9]] -; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1248,9 +1248,9 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] -; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 @@ -1339,10 +1339,10 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP6]], 3 ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl [[TMP9]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP10]], splat (i64 3) @@ -1449,9 +1449,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP8]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNEG]] -; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 @@ -1492,9 +1492,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP33:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 64b69be5f5259..322f96f45d191 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -19,12 +19,12 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 2850aca918554..f7c67c2e79c34 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -215,9 +215,9 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP2]], [[TMP5]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 4b096e17a4fa0..881de8dc79823 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -25,10 +25,10 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 3 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[N_VEC]], 3 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP26]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -138,12 +138,12 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -229,12 +229,12 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -2 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], [[DOTNEG]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 5f151c260c8fc..d9fc9a48ba99e 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 ; RUN: opt -p loop-vectorize -S %s | FileCheck %s target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512" @@ -108,7 +108,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END27:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] @@ -117,41 +117,41 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF24]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC25]] +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC25]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND30:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT31:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI32:%.*]] = phi <2 x i64> [ [[TMP55]], %[[VEC_EPILOG_PH]] ], [ [[TMP56:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX29]], 0 -; CHECK-NEXT: [[NEXT_GEP33:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP57]] -; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[NEXT_GEP33]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i8>, ptr [[TMP58]], align 1 -; CHECK-NEXT: [[TMP59:%.*]] = zext <2 x i8> [[WIDE_LOAD34]] to <2 x i64> -; CHECK-NEXT: [[TMP60:%.*]] = shl <2 x i64> [[VEC_IND30]], splat (i64 1) -; CHECK-NEXT: [[TMP61:%.*]] = shl <2 x i64> [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP56]] = or <2 x i64> [[TMP61]], [[VEC_PHI32]] -; CHECK-NEXT: [[INDEX_NEXT35]] = add nuw i64 [[INDEX29]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT31]] = add <2 x i64> [[VEC_IND30]], splat (i64 2) -; CHECK-NEXT: [[TMP62:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC25]] -; CHECK-NEXT: br i1 [[TMP62]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[INDEX26:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT32:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND27:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT28:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI29:%.*]] = phi <2 x i64> [ [[TMP57]], %[[VEC_EPILOG_PH]] ], [ [[TMP58:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX26]], 0 +; CHECK-NEXT: [[NEXT_GEP30:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP59]] +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[NEXT_GEP30]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i8>, ptr [[TMP60]], align 1 +; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[WIDE_LOAD32]] to <2 x i64> +; CHECK-NEXT: [[TMP62:%.*]] = shl <2 x i64> [[VEC_IND27]], splat (i64 1) +; CHECK-NEXT: [[TMP63:%.*]] = shl <2 x i64> [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP58]] = or <2 x i64> [[TMP63]], [[VEC_PHI29]] +; CHECK-NEXT: [[INDEX_NEXT32]] = add nuw i64 [[INDEX26]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT28]] = add <2 x i64> [[VEC_IND27]], splat (i64 2) +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT32]], [[N_VEC25]] +; CHECK-NEXT: br i1 [[TMP54]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP56]]) -; CHECK-NEXT: [[CMP_N36:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]] -; CHECK-NEXT: br i1 [[CMP_N36]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP58]]) +; CHECK-NEXT: [[CMP_N33:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]] +; CHECK-NEXT: br i1 [[CMP_N33]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL28:%.*]] = phi ptr [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX29:%.*]] = phi i64 [ [[TMP54]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX34:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL35:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL26]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX29]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL28]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX34]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL35]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[PTR_IV]], align 1 ; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP53]] to i64 ; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[IV]], 1 @@ -164,7 +164,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP_I166_I]], i1 [[CMP2]], i1 false ; CHECK-NEXT: br i1 [[AND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP52]], %[[MIDDLE_BLOCK]] ], [ [[TMP54]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP52]], %[[MIDDLE_BLOCK]] ], [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i64 [[RED_NEXT_LCSSA]], 0 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -192,6 +192,8 @@ exit: ret i1 %res } ;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll index 130ef7542cfb4..f9c1ab4a81810 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blend-any-of-reduction-cost.ll @@ -100,8 +100,8 @@ define i32 @any_of_reduction_used_in_blend_with_mutliple_phis(ptr %src, i64 %N, ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[ANY_OF_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ANY_OF_RED_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 2e0eee1f86292..bcf1eee2c053b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -22,9 +22,9 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64 [[TMP6]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP8]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP12]], splat (i64 3) @@ -117,9 +117,9 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 333, [[TMP5]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i64 3) @@ -212,9 +212,9 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 333, [[TMP5]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i64 3) @@ -317,9 +317,9 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64 [[TMP6]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP8]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP12]], splat (i64 3) @@ -424,9 +424,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 333, [[TMP5]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], splat (i64 3) @@ -539,9 +539,9 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64 [[TMP6]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP8]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP11:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = mul [[TMP12]], splat (i64 3) @@ -857,9 +857,9 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP15]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP19:%.*]] = add [[TMP18]], zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP19]], splat (i64 3) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 37cb5570c67f2..5c39511c50239 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -29,10 +29,10 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP5]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 3 -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8 +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 3 +; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP18]] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[START_EXT]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv8i64() @@ -112,9 +112,9 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 252, [[TMP6]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP10:%.*]] = add [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i32 4) @@ -336,9 +336,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 37, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 37, [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 9 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 9 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = xor [[BROADCAST_SPLAT]], splat (i32 -1) @@ -422,9 +422,9 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP10:%.*]] = add [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i64 2) @@ -437,24 +437,24 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP15]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i8( [[WIDE_VEC]]) -; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = zext [[TMP16]] to -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP17]], [[TMP18]], i32 4, splat (i1 true)) +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = zext [[TMP17]] to +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP19]], i32 4, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index dd2e75f1f5e21..58d6fd05241f2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -28,8 +28,8 @@ define i64 @pr97452_scalable_vf1_for(ptr %src) #0 { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 3 ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index b303df7c3875a..1acf86336c1fd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -63,13 +63,13 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 [[TMP46]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP5]], [[TMP48]] +; CHECK-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 ; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[X_I64]], [[TMP49]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[TMP50:%.*]] = mul i32 [[DOTCAST]], 3 ; CHECK-NEXT: [[IND_END22:%.*]] = add i32 [[X_I32]], [[TMP50]] -; CHECK-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP51]], 8 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[X_I64]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP53:%.*]] = call @llvm.stepvector.nxv8i64() @@ -93,11 +93,11 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X_I64]], %[[ENTRY]] ], [ [[X_I64]], %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL12:%.*]] = phi i32 [ [[IND_END22]], %[[MIDDLE_BLOCK]] ], [ [[X_I32]], %[[ENTRY]] ], [ [[X_I32]], %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL25:%.*]] = phi i32 [ [[IND_END22]], %[[MIDDLE_BLOCK]] ], [ [[X_I32]], %[[ENTRY]] ], [ [[X_I32]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_CONV:%.*]] = phi i32 [ [[BC_RESUME_VAL12]], %[[SCALAR_PH]] ], [ [[TMP64:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_CONV:%.*]] = phi i32 [ [[BC_RESUME_VAL25]], %[[SCALAR_PH]] ], [ [[TMP64:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_I64:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP61:%.*]] = load i64, ptr [[GEP_I64]], align 8 ; CHECK-NEXT: [[TMP62:%.*]] = sext i32 [[IV_CONV]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index ea376fecd5b76..c2f8d0b130e8c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -38,9 +38,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV32-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] -; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 ; RV32-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; RV32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 ; RV32-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV32-NEXT: [[TMP8:%.*]] = add [[TMP7]], zeroinitializer ; RV32-NEXT: [[TMP9:%.*]] = mul [[TMP8]], splat (i64 16) @@ -118,9 +118,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV64-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] -; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 ; RV64-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 ; RV64-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV64-NEXT: [[TMP8:%.*]] = add [[TMP7]], zeroinitializer ; RV64-NEXT: [[TMP9:%.*]] = mul [[TMP8]], splat (i64 16) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 822d5c8d16917..beb535ccfacf2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -87,9 +87,9 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 64 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i64 64) @@ -164,10 +164,10 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 8 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 8 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP18]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -330,7 +330,6 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -350,7 +349,7 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: ; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -609,7 +608,6 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] ; NOSTRIDED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -629,7 +627,7 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: ; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -734,12 +732,12 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]] ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] ; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]] ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] ; STRIDED-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP11]] -; STRIDED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 209c251f6a6a4..a330b6964a660 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -20,10 +20,10 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] -; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] +; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -121,10 +121,10 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] -; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 ; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] +; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index 7efa65ed270ef..a2f85b9ed4ffe 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -25,9 +25,9 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], [[TMP7]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[SPEC_SELECT]], [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[SPEC_SELECT]], [[N_VEC]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index d0754f1c2bb55..7b0fa644ea001 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -66,8 +66,8 @@ define void @func_21() { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 6, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 6, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 036ce3d4dd853..28cb8105f927d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -496,8 +496,8 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 { ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP30]], i1 false, i1 false ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ANY_OF:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANY_OF_NEXT:%.*]], [[LOOP]] ] @@ -987,8 +987,8 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 { ; CHECK-NEXT: store i32 [[TMP10]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 24, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 0e511cfc9bffe..6fc70802e82cb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -48,8 +48,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 3d0fe42635fe5..b6e9dfc2322ff 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -348,10 +348,10 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[ENTRY]] ], [ 2048, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[ENTRY]] ], [ 2048, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index 597be33ca63f6..9e87cc29be4e8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -368,9 +368,9 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP3]], 63 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP3]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[N_VEC]], -72 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i32 [[TMP4]] -; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP3]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <64 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT3]], <64 x i32> poison, <64 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -414,9 +414,9 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; AUTOVF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP3]], 7 ; AUTOVF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8 ; AUTOVF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; AUTOVF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP3]], 1 ; AUTOVF-NEXT: [[TMP4:%.*]] = mul i32 [[N_VEC]], -72 ; AUTOVF-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i32 [[TMP4]] -; AUTOVF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP3]], 1 ; AUTOVF-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <8 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; AUTOVF-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT3]], <8 x i32> poison, <8 x i32> zeroinitializer ; AUTOVF-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll index bb7fe4d4f1e56..270e6bcd9ab1f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr109581-unused-blend.ll @@ -91,8 +91,8 @@ define i32 @unused_blend_after_unrolling(ptr %p, i32 %a, i1 %c.1, i16 %x, i16 %y ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i16> [[PREDPHI]], i32 3 ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 97, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 97, %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP_LATCH:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index f0dfcf3eada72..2e98f457e7d54 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -86,9 +86,9 @@ define void @test(ptr %p) { ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VEC: scalar.ph: -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ] +; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ] +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ] ; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ] ; VEC-NEXT: br label [[FOR_BODY:%.*]] ; VEC: for.body: ; VEC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IDX:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index bcaedf724ee39..fae21f38adb81 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -112,16 +112,16 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: iter.check27: +; CHECK: iter.check23: ; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP3]], -9 ; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK25:%.*]] = icmp ult i64 [[TMP28]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH46:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK29:%.*]] -; CHECK: vector.main.loop.iter.check29: +; CHECK: vector.main.loop.iter.check25: ; CHECK-NEXT: [[MIN_ITERS_CHECK28:%.*]] = icmp ult i64 [[TMP28]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label [[VEC_EPILOG_PH47:%.*]], label [[VECTOR_PH30:%.*]] -; CHECK: vector.ph30: +; CHECK: vector.ph26: ; CHECK-NEXT: [[N_MOD_VF31:%.*]] = urem i64 [[TMP28]], 16 ; CHECK-NEXT: [[N_VEC32:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF31]] ; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[N_VEC32]], 2 @@ -129,11 +129,11 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[TOBOOL6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY35:%.*]] -; CHECK: vector.body35: -; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH30]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY35]] ] -; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY35]] ] -; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY35]] ] +; CHECK-NEXT: br label [[VECTOR_BODY29:%.*]] +; CHECK: vector.body29: +; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH30]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY29]] ] +; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY29]] ] +; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY29]] ] ; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND35]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]] ; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] @@ -153,18 +153,18 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) ; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] -; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK24:%.*]], label [[VECTOR_BODY35]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block24: +; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK20:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block20: ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK48:%.*]] -; CHECK: vec.epilog.iter.check48: +; CHECK: vec.epilog.iter.check42: ; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] ; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH46]], label [[VEC_EPILOG_PH47]] -; CHECK: vec.epilog.ph47: +; CHECK: vec.epilog.ph41: ; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] ; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] @@ -181,11 +181,11 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], ; CHECK-NEXT: [[BROADCAST_SPLATINSERT72:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT72]], <8 x i1> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY58:%.*]] -; CHECK: vec.epilog.vector.body58: -; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH47]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY58]] ] -; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY58]] ] -; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY58]] ] +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY50:%.*]] +; CHECK: vec.epilog.vector.body50: +; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH47]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] +; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] +; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] ; CHECK-NEXT: [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] @@ -205,13 +205,13 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] -; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY58]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: vec.epilog.middle.block45: +; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK39:%.*]], label [[VEC_EPILOG_VECTOR_BODY50]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: vec.epilog.middle.block39: ; CHECK-NEXT: [[CMP_N75:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH46]] -; CHECK: vec.epilog.scalar.ph46: -; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ] -; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ] +; CHECK: vec.epilog.scalar.ph40: +; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK39]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ] +; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK39]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] ; CHECK: for.body.us: ; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH46]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 55ff26c55b512..5939790973021 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -142,8 +142,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_114]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT19]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY15:%.*]] -; CHECK: vector.body15: -; CHECK-NEXT: [[INDEX16:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT29:%.*]], [[PRED_STORE_CONTINUE28:%.*]] ] +; CHECK: vector.body14: +; CHECK-NEXT: [[INDEX16:%.*]] = phi i64 [ 0, [[VECTOR_PH9]] ], [ [[INDEX_NEXT29:%.*]], [[PRED_STORE_CONTINUE78:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX16]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX16]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT17]], <4 x i64> poison, <4 x i32> zeroinitializer @@ -151,7 +151,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT20]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i64 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] -; CHECK: pred.store.if21: +; CHECK: pred.store.if20: ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 [[OFFSET_IDX]] @@ -160,10 +160,10 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP23]], [[TMP21]] ; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] -; CHECK: pred.store.continue22: +; CHECK: pred.store.continue21: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP18]], i64 1 ; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] -; CHECK: pred.store.if23: +; CHECK: pred.store.if22: ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 @@ -173,10 +173,10 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP31]], [[TMP29]] ; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] -; CHECK: pred.store.continue24: +; CHECK: pred.store.continue23: ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP18]], i64 2 ; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] -; CHECK: pred.store.if25: +; CHECK: pred.store.if24: ; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 @@ -186,10 +186,10 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP39]], [[TMP37]] ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] -; CHECK: pred.store.continue26: +; CHECK: pred.store.continue25: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP18]], i64 3 -; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28]] -; CHECK: pred.store.if27: +; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE78]] +; CHECK: pred.store.if26: ; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[TMP43]] ; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 @@ -198,8 +198,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[TMP43]] ; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] -; CHECK: pred.store.continue28: +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE78]] +; CHECK: pred.store.continue27: ; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX16]], 4 ; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC12]] ; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY15]], !llvm.loop [[LOOP5:![0-9]+]] @@ -273,7 +273,7 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT11]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE18:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX6:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 @@ -291,7 +291,7 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] -; CHECK: pred.store.if13: +; CHECK: pred.store.if11: ; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = or disjoint i64 [[OFFSET_IDX6]], 4 @@ -299,29 +299,29 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16 ; CHECK-NEXT: store i32 [[TMP9]], ptr [[NEXT_GEP3]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] -; CHECK: pred.store.continue14: +; CHECK: pred.store.continue12: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]] -; CHECK: pred.store.if15: +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] +; CHECK: pred.store.if13: ; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[OFFSET_IDX6]], 8 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16 ; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP4]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] -; CHECK: pred.store.continue16: +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] +; CHECK: pred.store.continue14: ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18]] -; CHECK: pred.store.if17: +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.if15: ; CHECK-NEXT: [[TMP15:%.*]] = or disjoint i64 [[OFFSET_IDX]], 12 ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP16:%.*]] = or disjoint i64 [[OFFSET_IDX6]], 12 ; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP10]], align 16 ; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP5]], align 16 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] -; CHECK: pred.store.continue18: +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] +; CHECK: pred.store.continue16: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -471,7 +471,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] -; CHECK: pred.store.if10: +; CHECK: pred.store.if9: ; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[OFFSET_IDX5]], 4 ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = or disjoint i64 [[OFFSET_IDX]], 2 @@ -481,10 +481,10 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7 ; CHECK-NEXT: store i32 [[TMP11]], ptr [[NEXT_GEP7]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE11]] -; CHECK: pred.store.continue11: +; CHECK: pred.store.continue10: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] -; CHECK: pred.store.if12: +; CHECK: pred.store.if11: ; CHECK-NEXT: [[TMP13:%.*]] = or disjoint i64 [[OFFSET_IDX5]], 8 ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = or disjoint i64 [[OFFSET_IDX]], 4 @@ -494,10 +494,10 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7 ; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE13]] -; CHECK: pred.store.continue13: +; CHECK: pred.store.continue12: ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 ; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]] -; CHECK: pred.store.if14: +; CHECK: pred.store.if13: ; CHECK-NEXT: [[TMP19:%.*]] = or disjoint i64 [[OFFSET_IDX5]], 12 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP20:%.*]] = or disjoint i64 [[OFFSET_IDX]], 6 @@ -507,7 +507,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7 ; CHECK-NEXT: store i32 [[TMP23]], ptr [[NEXT_GEP9]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] -; CHECK: pred.store.continue15: +; CHECK: pred.store.continue14: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index 6e7efe018faf6..e11f77d8aeaec 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -33,7 +33,7 @@ ; CHECK: br i1 {{.+}}, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !prof [[PROF_F0_VEC_EPILOG_VECTOR_BODY:![0-9]+]] ; ; CHECK: vec.epilog.middle.block: -; CHECK: br i1 %cmp.n9, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] +; CHECK: br i1 %cmp.n{{.+}}, label %exit.loopexit, label %vec.epilog.scalar.ph, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]] ; ; CHECK: vec.epilog.scalar.ph: ; CHECK: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index d5b3e80b80b23..23cb7bbc267e5 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -241,8 +241,8 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ false, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i1 [[BC_MERGE_RDX]], false ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 @@ -275,8 +275,8 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ false, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -426,8 +426,8 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i1 [ [[RDX_SELECT22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ true, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i1 [ [[RDX_SELECT22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ true, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[RED:%.*]] = phi i1 [ [[BC_MERGE_RDX23]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index 7aedb218e1352..fc71f8a934047 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -661,10 +661,10 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 true, label %End, label %scalar.ph ; CHECK: scalar.ph: -; CHECK-NEXT: %bc.resume.val = phi i64 [ 0, %middle.block ], [ 0, %Entry ] ; CHECK-NEXT: phi double [ [[TMP0]], %middle.block ], [ 0.000000e+00, %Entry ] ; CHECK-NEXT: phi double [ [[TMP3]], %middle.block ], [ 0.000000e+00, %Entry ] ; CHECK-NEXT: phi double [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ], [ 0.000000e+00, %Entry ] +; CHECK-NEXT: %bc.resume.val = phi i64 [ 0, %middle.block ], [ 0, %Entry ] ; CHECK: End: ; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP0]], %middle.block ] ; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP3]], %middle.block ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index bd3688d852d0f..a97fcc9f437c4 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -41,8 +41,8 @@ define void @can_sink_after_store(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ] @@ -121,8 +121,8 @@ define void @sink_sdiv(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ] @@ -202,8 +202,8 @@ define void @can_sink_with_additional_user(i32 %x, ptr %ptr, i64 %tc) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ] @@ -387,9 +387,9 @@ define void @instruction_with_2_FOR_operands(ptr noalias %A, ptr noalias %B, ptr ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[BB74:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[BB]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb13: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] @@ -463,9 +463,9 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] @@ -554,9 +554,9 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] @@ -852,8 +852,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -935,8 +935,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -1057,9 +1057,9 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] @@ -1125,9 +1125,9 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 2d50f8219549a..7f562a4f2c445 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -55,8 +55,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ] @@ -111,8 +111,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ] @@ -165,8 +165,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ] @@ -265,8 +265,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: for.cond.cleanup.loopexit: @@ -334,8 +334,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: for.cond.cleanup.loopexit: @@ -398,8 +398,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: for.cond.cleanup.loopexit: @@ -525,8 +525,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[SCALAR_BODY]] ] @@ -598,8 +598,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ] @@ -669,8 +669,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[TMP14:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ] @@ -912,8 +912,8 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-IC: for.cond.cleanup: ; UNROLL-NO-IC-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] @@ -956,8 +956,8 @@ define i32 @PR27246() { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: ; UNROLL-NO-VF-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] @@ -1005,8 +1005,8 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]] ; SINK-AFTER: for.cond.cleanup: ; SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] @@ -1780,8 +1780,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] @@ -1836,8 +1836,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] @@ -1888,8 +1888,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[TMP11:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] @@ -2016,8 +2016,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP30]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP30]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP48:%.*]], [[FOR_BODY]] ] @@ -2076,8 +2076,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] @@ -2143,8 +2143,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[TMP25:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ] @@ -2240,8 +2240,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[FOR_BODY]] ] @@ -2299,8 +2299,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[FOR_BODY]] ] @@ -2353,8 +2353,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[TMP12:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] @@ -2685,8 +2685,8 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 +; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2816,8 +2816,8 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1 ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 +; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ] @@ -2881,8 +2881,8 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3 ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 +; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2990,8 +2990,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7 ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 +; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] @@ -3189,8 +3189,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1 ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 +; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] @@ -3273,8 +3273,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3 ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] -; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; SINK-AFTER-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 +; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]] ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index e0b1c5f2d861b..2797f9964e689 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -2773,8 +2773,8 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -2844,8 +2844,8 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -2910,8 +2910,8 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -2981,8 +2981,8 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -3051,8 +3051,8 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -3122,8 +3122,8 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -5561,9 +5561,9 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 1 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -5781,9 +5781,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD7]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: exit: @@ -6236,9 +6236,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6307,9 +6307,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6382,9 +6382,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6463,9 +6463,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6538,9 +6538,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 16ba3b26e68ae..e8ae770ff4c0f 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1508,9 +1508,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index 689d00e1d0186..d26362f0c162e 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -401,8 +401,8 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[FOR_BODY3_LR_PH]] ], [ [[TMP4]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[FOR_BODY3_LR_PH]] ], [ [[TMP4]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[FOR_BODY3]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll index d8b6a4e758851..95ae2de117666 100644 --- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll @@ -52,13 +52,13 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK: vector.ph7: ; CHECK-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF8]] -; CHECK-NEXT: br label [[VECTOR_BODY11:%.*]] -; CHECK: vector.body9: -; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ 0, [[VECTOR_PH7]] ], [ [[INDEX_NEXT13:%.*]], [[VECTOR_BODY11]] ] +; CHECK-NEXT: br label [[VECTOR_BODY10:%.*]] +; CHECK: vector.body10: +; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ 0, [[VECTOR_PH7]] ], [ [[INDEX_NEXT13:%.*]], [[VECTOR_BODY10]] ] ; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope !4, !noalias !7 ; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX12]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK4:%.*]], label [[VECTOR_BODY11]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK4:%.*]], label [[VECTOR_BODY10]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block4: ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[LOOP_3_LR_PH:%.*]], label [[SCALAR_PH5]] @@ -69,8 +69,8 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[IDXPROM_I_I61:%.*]] = and i64 [[IV761_LCSSA]], 1 ; CHECK-NEXT: [[ARRAYIDX_I_I62:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[IDXPROM_I_I61]] ; CHECK-NEXT: [[MIN_ITERS_CHECK22:%.*]] = icmp ult i64 [[TMP3]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK22]], label [[SCALAR_PH21:%.*]], label [[VECTOR_MEMCHECK14:%.*]] -; CHECK: vector.memcheck14: +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK22]], label [[SCALAR_PH22:%.*]], label [[VECTOR_MEMCHECK15:%.*]] +; CHECK: vector.memcheck15: ; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4 ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[IDXPROM_I_I61]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP12]], 4 @@ -78,22 +78,22 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[BOUND017:%.*]] = icmp ult ptr [[TMP1]], [[SCEVGEP16]] ; CHECK-NEXT: [[BOUND118:%.*]] = icmp ult ptr [[ARRAYIDX_I_I62]], [[SCEVGEP15]] ; CHECK-NEXT: [[FOUND_CONFLICT19:%.*]] = and i1 [[BOUND017]], [[BOUND118]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT19]], label [[SCALAR_PH21]], label [[VECTOR_PH23:%.*]] -; CHECK: vector.ph23: +; CHECK-NEXT: br i1 [[FOUND_CONFLICT19]], label [[SCALAR_PH22]], label [[VECTOR_PH24:%.*]] +; CHECK: vector.ph24: ; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF24]] -; CHECK-NEXT: br label [[VECTOR_BODY26:%.*]] -; CHECK: vector.body26: -; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ 0, [[VECTOR_PH23]] ], [ [[INDEX_NEXT29:%.*]], [[VECTOR_BODY26]] ] +; CHECK-NEXT: br label [[VECTOR_BODY27:%.*]] +; CHECK: vector.body27: +; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ 0, [[VECTOR_PH24]] ], [ [[INDEX_NEXT29:%.*]], [[VECTOR_BODY27]] ] ; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !alias.scope !10, !noalias !13 ; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX29]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC25]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK20:%.*]], label [[VECTOR_BODY26]], !llvm.loop [[LOOP15:![0-9]+]] -; CHECK: middle.block20: +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK21:%.*]], label [[VECTOR_BODY27]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: middle.block21: ; CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC25]] -; CHECK-NEXT: br i1 [[CMP_N27]], label [[LOOP_CLEANUP:%.*]], label [[SCALAR_PH21]] -; CHECK: scalar.ph21: -; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK20]] ], [ 0, [[LOOP_3_LR_PH]] ], [ 0, [[VECTOR_MEMCHECK14]] ] +; CHECK-NEXT: br i1 [[CMP_N27]], label [[LOOP_CLEANUP:%.*]], label [[SCALAR_PH22]] +; CHECK: scalar.ph22: +; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK21]] ], [ 0, [[LOOP_3_LR_PH]] ], [ 0, [[VECTOR_MEMCHECK15]] ] ; CHECK-NEXT: br label [[LOOP_3:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV846:%.*]] = phi i64 [ [[IV_NEXT85:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL13]], [[SCALAR_PH5]] ] @@ -105,7 +105,7 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: [[EXITCOND92_NOT:%.*]] = icmp eq i64 [[IV846]], [[IV]] ; CHECK-NEXT: br i1 [[EXITCOND92_NOT]], label [[LOOP_3_LR_PH]], label [[LOOP_2]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: loop.3: -; CHECK-NEXT: [[IV932:%.*]] = phi i64 [ [[BC_RESUME_VAL26]], [[SCALAR_PH21]] ], [ [[IV_NEXT94:%.*]], [[LOOP_3]] ] +; CHECK-NEXT: [[IV932:%.*]] = phi i64 [ [[BC_RESUME_VAL26]], [[SCALAR_PH22]] ], [ [[IV_NEXT94:%.*]], [[LOOP_3]] ] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_I_I62]], align 4 ; CHECK-NEXT: [[ARRAYIDX_I_I653:%.*]] = getelementptr i32, ptr [[TMP2:%.*]], i64 [[IV93:%.*]] ; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll index 50e18070a5c3f..24b9441749ee4 100644 --- a/llvm/test/Transforms/LoopVectorize/pr66616.ll +++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll @@ -46,7 +46,7 @@ define void @pr66616(ptr %ptr) { ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[ADD3_LCSSA]], [[DOTCAST]] ; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY7:%.*]] -; CHECK: vector.body5: +; CHECK: vector.body4: ; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ 0, [[VECTOR_PH3]] ], [ [[INDEX_NEXT9:%.*]], [[VECTOR_BODY7]] ] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX8]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-align.ll b/llvm/test/Transforms/LoopVectorize/reduction-align.ll index 8eef1ed90c0c9..01a71d39e1773 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-align.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-align.ll @@ -40,8 +40,8 @@ define void @fn(ptr %hbuf, ptr %ref, i32 %height) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[HEIGHT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 240421341626a..ad2f9c608f204 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -733,8 +733,8 @@ define i32 @cond-uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[RDX1:%.*]] = phi i32 [ [[ADD2:%.*]], [[IF_END:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] @@ -897,8 +897,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP49]], [[MIDDLE_BLOCK]] ], [ 2.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[RDX1:%.*]] = phi float [ [[RES:%.*]], [[FOR_INC:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] @@ -1039,8 +1039,8 @@ define i32 @uncond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP29]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[RES:%.*]], [[FOR_INC:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] @@ -1168,8 +1168,8 @@ define i32 @uncond_cond_uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP30]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[ADD3:%.*]], [[IF_END:%.*]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 94fce866785db..f136b0e2e0b31 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -185,10 +185,10 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP5]] to i32 @@ -262,11 +262,11 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 ; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[DOTCAST]], 2.000000e+00 ; CHECK-NEXT: [[IND_END:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-NEXT: [[TMP8:%.*]] = uitofp [[TMP7]] to ; CHECK-NEXT: [[TMP9:%.*]] = fmul [[TMP8]], splat (float 2.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 5e28192997695..836115f381382 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -17,8 +17,8 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[EXTRA_ITER]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[EXTRA_ITER]], [[N_VEC]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[EXTRA_ITER]], 1 +; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[EXTRA_ITER]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -39,8 +39,8 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[EXTRA_ITER]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[NEXT:%.*]] = phi i32 [ [[SEL:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll index 40b007eff8ff8..57bc7b8337249 100644 --- a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll @@ -19,8 +19,8 @@ define void @pr75298_store_reduction_value_in_folded_loop(i64 %iv.start) optsize ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]] ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 32e55bff94b3e..9f0fbb9b346af 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -39,10 +39,11 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -106,10 +107,11 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ] +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %for.body.preheader ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -171,11 +173,12 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -234,11 +237,12 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK-NEXT: IR %red = phi float [ %red.next, %for.body ], [ 0.000000e+00, %entry ] ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors @@ -320,10 +324,11 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] +; CHECK-NEXT: IR %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK-NEXT: IR %cmp = icmp ult i64 %i, 5 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -366,6 +371,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: Live-in ir<256> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[IV_END:%.+]]> = DERIVED-IV ir<0> + vp<[[VTC]]> * ir<4> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -397,10 +403,11 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %cmp = icmp slt i64 %iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -476,12 +483,13 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] -; CHECK-NEXT: IR %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) +; CHECK-NEXT: IR %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] (extra operand: vp<[[RED_RESUME]]> from scalar.ph) ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -567,10 +575,11 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %if.end ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %if.end ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %cmp1 = icmp slt i32 %lsd, 100 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -627,6 +636,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[IV_END:%.+]]> = DERIVED-IV ir<0> + vp<[[VTC]]> * vp<[[EXP_SCEV]]> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -652,10 +662,11 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %iv.next = add i64 %iv, %inc ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -716,10 +727,11 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %ec = icmp eq i32 %iv.next, 1000 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -780,6 +792,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -845,10 +858,11 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -930,10 +944,11 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %ifcond = fcmp oeq float %ld.value, 5.0 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -1004,10 +1019,11 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %n ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -1112,11 +1128,12 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors ; CHECK-EMPTY: From 6440a9139d76058577b2b48cd1d667aeb642523d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 12 Dec 2024 16:12:22 +0000 Subject: [PATCH 02/17] !fixup address first set of comments, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 127 +++++++++++------- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 - .../AArch64/low_trip_count_predicates.ll | 2 + .../LoopVectorize/X86/small-size.ll | 2 + .../epilog-vectorization-any-of-reductions.ll | 4 +- .../iv-select-cmp-nested-loop.ll | 6 +- .../optimal-epilog-vectorization.ll | 5 +- 7 files changed, 95 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2bb95bf4e97b6..298a4b3f4e02b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -517,14 +517,15 @@ class InnerLoopVectorizer { /// Fix the non-induction PHIs in \p Plan. void fixNonInductionPHIs(VPTransformState &State); - /// Create the bypass resume value coming from the additional bypass block. \p - /// Step is the SCEV-expanded induction step to use. \p MainVectorTripCount - /// provides the trip count of the main vector loop, used to compute the - /// resume value reaching the scalar loop preheader directly from this - /// additional bypass block. - void createInductionBypassValue(PHINode *OrigPhi, - const InductionDescriptor &ID, Value *Step, - Value *MainVectorTripCount); + /// Create and record the bypass resume value for an induction Phi coming from + /// the additional bypass block. \p Step is the SCEV-expanded induction step + /// to use. \p MainVectorTripCount provides the trip count of the main vector + /// loop, used to compute the resume value reaching the scalar loop preheader + /// directly from this additional bypass block. + void createInductionAdditionalBypassValue(PHINode *OrigPhi, + const InductionDescriptor &ID, + Value *Step, + Value *MainVectorTripCount); /// Returns the original loop trip count. Value *getTripCount() const { return TripCount; } @@ -581,10 +582,10 @@ class InnerLoopVectorizer { /// vector loop preheader, middle block and scalar preheader. void createVectorLoopSkeleton(StringRef Prefix); - /// Create values for the induction variables to resume iteration count - /// in the bypass block. - void createInductionBypassValues(const SCEV2ValueTy &ExpandedSCEVs, - Value *MainVectorTripCount); + /// Create and record the values for induction variables to resume coming from + /// the additional bypass block. + void createInductionAdditionalBypassValues(const SCEV2ValueTy &ExpandedSCEVs, + Value *MainVectorTripCount); /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. @@ -2604,10 +2605,12 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { nullptr, Twine(Prefix) + "scalar.ph"); } -void InnerLoopVectorizer::createInductionBypassValue( +void InnerLoopVectorizer::createInductionAdditionalBypassValue( PHINode *OrigPhi, const InductionDescriptor &II, Value *Step, Value *MainVectorTripCount) { Instruction *OldInduction = Legal->getPrimaryInduction(); + // For the primary induction the additional bypass end value is known. + // Otherwise it is computed. Value *EndValueFromAdditionalBypass = MainVectorTripCount; if (OrigPhi != OldInduction) { IRBuilder<> B(LoopVectorPreHeader->getTerminator()); @@ -2616,7 +2619,7 @@ void InnerLoopVectorizer::createInductionBypassValue( if (isa_and_nonnull(II.getInductionBinOp())) B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); - // Compute the end value for the additional bypass (if applicable). + // Compute the end value for the additional bypass. if (MainVectorTripCount) { B.SetInsertPoint(getAdditionalBypassBlock(), getAdditionalBypassBlock()->getFirstInsertionPt()); @@ -2672,15 +2675,15 @@ static void addFullyUnrolledInstructionsToIgnore( } } -void InnerLoopVectorizer::createInductionBypassValues( +void InnerLoopVectorizer::createInductionAdditionalBypassValues( const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) { assert(MainVectorTripCount && "Must have bypass information"); for (const auto &InductionEntry : Legal->getInductionVars()) { PHINode *OrigPhi = InductionEntry.first; const InductionDescriptor &II = InductionEntry.second; - createInductionBypassValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), - MainVectorTripCount); + createInductionAdditionalBypassValue( + OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), MainVectorTripCount); } } @@ -2741,9 +2744,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton( // faster. emitMemRuntimeChecks(LoopScalarPreHeader); - Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); - assert(VectorTripCount && "Expected valid arguments"); - return LoopVectorPreHeader; } @@ -7736,8 +7736,9 @@ DenseMap LoopVectorizationPlanner::executePlan( //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - BestVPlan.prepareToExecute(ILV.getTripCount(), - ILV.getOrCreateVectorTripCount(nullptr), State); + BestVPlan.prepareToExecute( + ILV.getTripCount(), + ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State); BestVPlan.execute(&State); @@ -7844,8 +7845,6 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton( // Generate the induction variable. EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); - createInductionResumeVPValues(ExpandedSCEVs, nullptr, &WideIVs); - return LoopVectorPreHeader; } @@ -8010,14 +8009,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( Phi->removeIncomingValue(EPI.MemSafetyCheck); } - // Generate induction resume values. These variables save the new starting - // indexes for the scalar loop. They are used to test if there are any tail - // iterations left once the vector loop has completed. - // Note that when the vectorized epilogue is skipped due to iteration count - // check, then the resume value for the induction variable comes from - // the trip count of the main vector loop, passed as the second argument. - createInductionResumeVPValues(ExpandedSCEVs, EPI.VectorTripCount); - + // Generate bypass values from the additional bypass block. Note that when the + // vectorized epilogue is skipped due to iteration count check, then the + // resume value for the induction variable comes from the trip count of the + // main vector loop, passed as the second argument. + createInductionAdditionalBypassValues(ExpandedSCEVs, EPI.VectorTripCount); return LoopVectorPreHeader; } @@ -8822,30 +8818,33 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } +/// Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the +/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute +/// the end value of the induction. static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, - VPBuilder &Builder, + VPBuilder &VectorPHBuilder, VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) { PHINode *OrigPhi; const InductionDescriptor *ID; - VPValue *Start; + VPValue *Start = PhiR->getStartValue(); VPValue *Step; Type *ScalarTy; bool IsCanonical = false; if (auto *WideIV = dyn_cast(PhiR)) { + // Truncated wide inductions resume from the last lane of their vector value + // in the last vector iteration. if (WideIV->getTruncInst()) return nullptr; OrigPhi = cast(WideIV->getUnderlyingValue()); ID = &WideIV->getInductionDescriptor(); - Start = WideIV->getStartValue(); Step = WideIV->getStepValue(); ScalarTy = WideIV->getScalarType(); IsCanonical = WideIV->isCanonical(); } else if (auto *WideIV = dyn_cast(PhiR)) { OrigPhi = cast(WideIV->getUnderlyingValue()); ID = &WideIV->getInductionDescriptor(); - Start = WideIV->getStartValue(); Step = WideIV->getOperand(1); ScalarTy = Start->getLiveInIRValue()->getType(); } else { @@ -8854,14 +8853,17 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, VPValue *EndValue = VectorTC; if (!IsCanonical) { - EndValue = Builder.createDerivedIV( + EndValue = VectorPHBuilder.createDerivedIV( ID->getKind(), dyn_cast_or_null(ID->getInductionBinOp()), Start, VectorTC, Step); } + // EndValue is based on the vector trip count (which has the same type as the + // widest induction) and thus may be wider than the induction here. if (ScalarTy != TypeInfo.inferScalarType(EndValue)) { - EndValue = Builder.createScalarCast(Instruction::Trunc, EndValue, ScalarTy); + EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue, + ScalarTy); } auto *ResumePhiRecipe = @@ -8870,10 +8872,12 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, return ResumePhiRecipe; } -/// Create resume phis in the scalar preheader for first-order recurrences and -/// reductions and update the VPIRInstructions wrapping the original phis in the -/// scalar header. -static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { +/// Create resume phis in the scalar preheader for first-order recurrences, +/// reductions and inductions, and update the VPIRInstructions wrapping the +/// original phis in the scalar header. +static void addScalarResumePhis( + VPlan &Plan, + function_ref GetHeaderPhiRecipe) { VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); auto *ScalarPH = Plan.getScalarPreheader(); auto *MiddleVPBB = cast(ScalarPH->getSinglePredecessor()); @@ -8888,7 +8892,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { auto *ScalarPhiI = dyn_cast(&ScalarPhiIRI->getInstruction()); if (!ScalarPhiI) break; - auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); + auto *VectorPhiR = GetHeaderPhiRecipe(ScalarPhiI); if (VPValue *ResumePhi = addResumeValuesForInduction( VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, @@ -9277,7 +9281,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanTransforms::handleUncountableEarlyExit( *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); } - addScalarResumePhis(RecipeBuilder, *Plan); + addScalarResumePhis(*Plan, [&RecipeBuilder](PHINode *P) { + return cast(RecipeBuilder.getRecipe(P)); + }); SetVector ExitUsersToFix = collectUsersInExitBlocks( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); @@ -9399,6 +9405,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { bool HasNUW = true; addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DebugLoc()); + + addScalarResumePhis( + *Plan, + [&Plan](PHINode *P) { + return find_singleton( + Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis(), + [P](VPRecipeBase &R, bool) -> VPHeaderPHIRecipe * { + auto *HeaderR = cast(&R); + return HeaderR->getUnderlyingValue() == P ? HeaderR : nullptr; + }); + } + + ); + assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; } @@ -10490,7 +10510,24 @@ bool LoopVectorizePass::processLoop(Loop *L) { Constant::getNullValue(IRI->getType()))); ResumePhi->eraseFromParent(); } - VPlanTransforms::removeDeadRecipes(*BestMainPlan); + // VPlanTransforms::removeDeadRecipes(*BestMainPlan); + + using namespace VPlanPatternMatch; + VPBasicBlock *ScalarPHVPBB = BestMainPlan->getScalarPreheader(); + VPValue *VectorTC = &BestMainPlan->getVectorTripCount(); + if (none_of(*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) { + return match(&R, m_VPInstruction( + m_Specific(VectorTC), m_SpecificInt(0))); + })) { + VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin()); + // When vectorizing the epilogue, create a resume phi for the + // canonical IV if no suitable resume phi was already created. + ScalarPHBuilder.createNaryOp( + VPInstruction::ResumePhi, + {VectorTC, BestMainPlan->getOrAddLiveIn(ConstantInt::get( + LVL.getWidestInductionType(), 0))}, + {}, "vec.epilog.resume.val"); + } auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, false); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index e1d002fb726c2..09e7851f41b1e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -65,7 +65,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPInstruction::FirstOrderRecurrenceSplice: case VPInstruction::LogicalAnd: case VPInstruction::PtrAdd: - case VPInstruction::ResumePhi: return false; default: return true; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 27cf432f0b45c..a48952ab0e2d0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -74,6 +74,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS1-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS1-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 +; CHECK-VS1-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-VS1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -182,6 +183,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 +; CHECK-VS2-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-VS2-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index ca55e7b30f88a..6d37953448213 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -151,6 +151,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT20]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i64 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] +; CHECK: pred.store.if20: +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [2048 x i32], ptr @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [2048 x i32], ptr @c, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 87301128442ed..9cc21d8631d86 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -241,9 +241,9 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ false, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i1 [[BC_MERGE_RDX]], false ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] @@ -331,6 +331,8 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP50]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll index 07ee5892dc28e..681ffe946d17d 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll @@ -40,8 +40,8 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) { ; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOP_EXIT]], label %[[SCALAR_PH]] ; CHECK-VF4IC1: [[SCALAR_PH]]: -; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ] ; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_OUTER]], %[[OUTER_LOOP]] ] +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ] ; CHECK-VF4IC1-NEXT: br label %[[INNER_LOOP:.*]] ; CHECK-VF4IC1: [[INNER_LOOP]]: ; CHECK-VF4IC1-NEXT: [[RDX_INNER:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SELECT:%.*]], %[[INNER_LOOP]] ] @@ -114,8 +114,8 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOP_EXIT]], label %[[SCALAR_PH]] ; CHECK-VF4IC4: [[SCALAR_PH]]: -; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ] ; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_OUTER]], %[[OUTER_LOOP]] ] +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ] ; CHECK-VF4IC4-NEXT: br label %[[INNER_LOOP:.*]] ; CHECK-VF4IC4: [[INNER_LOOP]]: ; CHECK-VF4IC4-NEXT: [[RDX_INNER:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SELECT:%.*]], %[[INNER_LOOP]] ] @@ -189,8 +189,8 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) { ; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[OUTER_LOOP_EXIT]], label %[[SCALAR_PH]] ; CHECK-VF1IC4: [[SCALAR_PH]]: -; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ] ; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_OUTER]], %[[OUTER_LOOP]] ] +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_LOOP]] ] ; CHECK-VF1IC4-NEXT: br label %[[INNER_LOOP:.*]] ; CHECK-VF1IC4: [[INNER_LOOP]]: ; CHECK-VF1IC4-NEXT: [[RDX_INNER:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SELECT:%.*]], %[[INNER_LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 9f5e5f3980e6f..978c252b3f992 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -175,6 +175,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -500,8 +501,8 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: [[IND_END5:%.*]] = mul i8 84, [[INDUCTION_IV]] ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]] ; CHECK-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer @@ -590,8 +591,8 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END5:%.*]] = mul i8 84, [[INDUCTION_IV]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.ph: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer From 4fd2d0e3ad50957097b1b7d2bccd5b3070c1b41c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 12 Dec 2024 20:36:25 +0000 Subject: [PATCH 03/17] !fixup address latest comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 116 ++++++++++-------- 1 file changed, 64 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c4c2677382534..215316b71eeeb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8918,8 +8918,12 @@ static void addScalarResumePhis( ScalarPhiIRI->addOperand(ResumePhi); continue; } - if (!isa(VectorPhiR)) + if (!isa( + VectorPhiR)) { + assert(cast(VectorPhiR)->getTruncInst() && + "should only skip truncated wide inductions"); continue; + } // The backedge value provides the value to resume coming out of a loop, // which for FORs is a vector whose last element needs to be extracted. The // start value provides the value if the loop is bypassed. @@ -10031,6 +10035,63 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced || !EnableLoopVectorization) {} +/// Prepare \p MainPlan for vectorizing the main vector loop during epilogue +/// vectorization. Remove ResumePhis from \p MainPlan for inductions if they +/// don't have a corresponding wide induction in \p EpiPlan. +static void preparePlanForMainVectorLoop( + VPlan &MainPlan, VPlan &EpiPlan, + const MapVector &Inductions) { + // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those + // will need their resume-values computed from the main vector loop. Others + // can be removed in the main VPlan. + SmallPtrSet WidenedPhis; + for (VPRecipeBase &R : + EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + if (!isa(&R)) + continue; + if (isa(&R)) + WidenedPhis.insert(cast(&R)->getPHINode()); + else + WidenedPhis.insert( + cast(R.getVPSingleValue()->getUnderlyingValue())); + } + for (VPRecipeBase &R : *cast(MainPlan.getScalarHeader())) { + auto *VPIRInst = cast(&R); + auto *IRI = dyn_cast(&VPIRInst->getInstruction()); + if (!IRI) + break; + if (WidenedPhis.contains(IRI) || !Inductions.contains(IRI)) + continue; + // There is no corresponding wide induction in the epilogue plan that would + // need a resume value. Set the operand in VPIRInst to zero, so ResumePhi + // can be removed. The resume values for the scalar loop will be created + // during execution of EpiPlan. + VPRecipeBase *ResumePhi = VPIRInst->getOperand(0)->getDefiningRecipe(); + VPIRInst->setOperand( + 0, MainPlan.getOrAddLiveIn(Constant::getNullValue(IRI->getType()))); + ResumePhi->eraseFromParent(); + } + + using namespace VPlanPatternMatch; + VPBasicBlock *ScalarPHVPBB = MainPlan.getScalarPreheader(); + VPValue *VectorTC = &MainPlan.getVectorTripCount(); + // If there is no suitable resume value for the canonical induction in the + // epilogue loop, create it. + if (none_of(*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) { + return match(&R, m_VPInstruction( + m_Specific(VectorTC), m_SpecificInt(0))); + })) { + VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin()); + // When vectorizing the epilogue, create a resume phi for the + // canonical IV if no suitable resume phi was already created. + ScalarPHBuilder.createNaryOp( + VPInstruction::ResumePhi, + {VectorTC, MainPlan.getOrAddLiveIn(ConstantInt::get( + MainPlan.getCanonicalIV()->getScalarType(), 0))}, + {}, "vec.epilog.resume.val"); + } +} + /// Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded /// SCEVs from \p ExpandedSCEVs and set resume values for header recipes. static void @@ -10491,62 +10552,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { // to be vectorized by executing the plan (potentially with a different // factor) again shortly afterwards. VPlan &BestEpiPlan = LVP.getPlanFor(EpilogueVF.Width); + preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan, + LVL.getInductionVars()); EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1, BestEpiPlan); EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks, *BestMainPlan); - - // Collect PHI nodes of wide inductions in the VPlan for the epilogue. - // Those will need their resume-values computed from the main vector - // loop. Others can be removed in the main VPlan. - SmallPtrSet WidenedPhis; - for (VPRecipeBase &R : - BestEpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (!isa(&R)) - continue; - if (isa(&R)) - WidenedPhis.insert( - cast(&R)->getPHINode()); - else - WidenedPhis.insert( - cast(R.getVPSingleValue()->getUnderlyingValue())); - } - for (VPRecipeBase &R : - *cast(BestMainPlan->getScalarHeader())) { - auto *VPIRInst = cast(&R); - auto *IRI = dyn_cast(&VPIRInst->getInstruction()); - if (!IRI) - break; - if (WidenedPhis.contains(IRI) || - !LVL.getInductionVars().contains(IRI)) - continue; - VPRecipeBase *ResumePhi = - VPIRInst->getOperand(0)->getDefiningRecipe(); - VPIRInst->setOperand(0, BestMainPlan->getOrAddLiveIn( - Constant::getNullValue(IRI->getType()))); - ResumePhi->eraseFromParent(); - } - // VPlanTransforms::removeDeadRecipes(*BestMainPlan); - - using namespace VPlanPatternMatch; - VPBasicBlock *ScalarPHVPBB = BestMainPlan->getScalarPreheader(); - VPValue *VectorTC = &BestMainPlan->getVectorTripCount(); - if (none_of(*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) { - return match(&R, m_VPInstruction( - m_Specific(VectorTC), m_SpecificInt(0))); - })) { - VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin()); - // When vectorizing the epilogue, create a resume phi for the - // canonical IV if no suitable resume phi was already created. - ScalarPHBuilder.createNaryOp( - VPInstruction::ResumePhi, - {VectorTC, BestMainPlan->getOrAddLiveIn(ConstantInt::get( - LVL.getWidestInductionType(), 0))}, - {}, "vec.epilog.resume.val"); - } - auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; From be373bf62ab02ba492887f7b102c724326cfe573 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 12 Dec 2024 21:12:26 +0000 Subject: [PATCH 04/17] !fixup simplify derived IV --- .../Transforms/Vectorize/LoopVectorize.cpp | 7 ++-- .../Transforms/Vectorize/VPlanPatternMatch.h | 16 ++++++++- .../Transforms/Vectorize/VPlanTransforms.cpp | 3 ++ .../epilog-vectorization-widen-inductions.ll | 4 +-- .../X86/epilog-vectorization-inductions.ll | 8 ++--- .../LoopVectorize/X86/gather_scatter.ll | 12 ++++--- .../LoopVectorize/X86/scatter_crash.ll | 34 ++++++++++--------- .../LoopVectorize/vplan-iv-transforms.ll | 3 +- .../vplan-printing-before-execute.ll | 16 +++++---- 9 files changed, 65 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 215316b71eeeb..0d3129be97fc2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9720,12 +9720,13 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags()); Value *Step = State.get(getStepValue(), VPLane(0)); - Value *CanonicalIV = State.get(getOperand(1), VPLane(0)); + Value *Index = State.get(getOperand(1), VPLane(0)); Value *DerivedIV = emitTransformedIndex( - State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, + State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); DerivedIV->setName(Name); - + // Index may only bet set to constant 0 in prepareToExecute. + assert((DerivedIV != Index || cast(Index)->isNullValue()) && "IV didn't need transforming?"); State.set(this, DerivedIV, VPLane(0)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 18e5e2996c828..ae577429c51f8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -78,6 +78,8 @@ template struct specific_intval { if (!VPV->isLiveIn()) return false; Value *V = VPV->getLiveInIRValue(); + if (!V) + return false; const auto *CI = dyn_cast(V); if (!CI && V->getType()->isVectorTy()) if (const auto *C = dyn_cast(V)) @@ -136,7 +138,8 @@ struct MatchRecipeAndOpcode { // Check for recipes that do not have opcodes. if constexpr (std::is_same::value || std::is_same::value || - std::is_same::value) + std::is_same::value || + std::is_same::value) return DefR; else return DefR && DefR->getOpcode() == Opcode; @@ -382,6 +385,17 @@ inline VPScalarIVSteps_match m_ScalarIVSteps(const Op0_t &Op0, const Op1_t &Op1) { return VPScalarIVSteps_match(Op0, Op1); } + +template +using VPDerivedIV_match = + Recipe_match, 0, false, VPDerivedIVRecipe>; + +template +inline VPDerivedIV_match m_DerivedIV(const Op0_t &Op0, + const Op1_t &Op1, const Op2_t &Op2) { + return VPDerivedIV_match({Op0, Op1, Op2}); +} + } // namespace VPlanPatternMatch } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f7b9a676ae808..676661eb7cc88 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1060,6 +1060,9 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return R.getVPSingleValue()->replaceAllUsesWith(A); + + if ((match(&R, m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) || match(&R, m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue())) ) && TypeInfo.inferScalarType(R.getOperand(1)) == TypeInfo.inferScalarType(R.getVPSingleValue())) + return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1)); } /// Move loop-invariant recipes out of the vector loop region in \p Plan. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 7645e1094e9ff..4f84d1deff852 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -233,8 +233,8 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END4:%.*]] = add i64 [[START]], [[N_VEC3]] @@ -409,8 +409,8 @@ define void @test_widen_extended_induction(ptr %dst) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 4400b6ba568d2..80fd95de7a189 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -60,8 +60,8 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF3]] ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[IV_START]], [[N_VEC4]] @@ -171,11 +171,11 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 48 ; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[TMP9]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP10]], align 2 -; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP11]], align 2 +; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP21]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD_3]], [[TMP1]] @@ -191,8 +191,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[L]], 8 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[L]], [[N_MOD_VF4]] ; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc i64 [[N_VEC5]] to i16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 9adc64a6020e7..d377fac6be6aa 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -639,6 +639,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512: vector.ph: ; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16 ; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; AVX512-NEXT: [[TMP36:%.*]] = mul i64 [[N_VEC]], 4 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP36]] ; AVX512-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 64 ; AVX512-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP13]] ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] @@ -673,8 +675,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; AVX512: vec.epilog.ph: -; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AVX512-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[TMP3]], 8 ; AVX512-NEXT: [[N_VEC10:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF9]] ; AVX512-NEXT: [[TMP24:%.*]] = mul i64 [[N_VEC10]], 4 @@ -691,12 +693,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP27]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]] ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD17]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]] ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr float, ptr [[TMP28]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD18:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]] ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20]], !noalias [[META22]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD18]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> splat (i1 true)), !alias.scope [[META20]], !noalias [[META22]] ; AVX512-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8 ; AVX512-NEXT: [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512 ; AVX512-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 4496b8e12b1a7..38440f2e49514 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -72,9 +72,9 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF6:%.*]] = urem i64 [[TMP6]], 8 ; CHECK-NEXT: [[N_VEC7:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF6]] ; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[N_VEC7]], 2 @@ -117,10 +117,10 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK25:%.*]] = icmp ult i64 [[TMP28]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH46:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK29:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH46:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK25:%.*]] ; CHECK: vector.main.loop.iter.check25: ; CHECK-NEXT: [[MIN_ITERS_CHECK28:%.*]] = icmp ult i64 [[TMP28]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label [[VEC_EPILOG_PH47:%.*]], label [[VECTOR_PH30:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label [[VEC_EPILOG_PH42:%.*]], label [[VECTOR_PH30:%.*]] ; CHECK: vector.ph26: ; CHECK-NEXT: [[N_MOD_VF31:%.*]] = urem i64 [[TMP28]], 16 ; CHECK-NEXT: [[N_VEC32:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF31]] @@ -156,16 +156,18 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK20:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block20: ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] -; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK48:%.*]] +; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK43:%.*]] +; CHECK: vec.epilog.iter.check43: ; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] ; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH46]], label [[VEC_EPILOG_PH47]] -; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] -; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH46]], label [[VEC_EPILOG_PH42]] +; CHECK: vec.epilog.ph42: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] +; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] +; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] ; CHECK-NEXT: [[N_MOD_VF52:%.*]] = urem i64 [[TMP28]], 8 ; CHECK-NEXT: [[N_VEC53:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF52]] ; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[N_VEC53]], 2 @@ -181,9 +183,9 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT72]], <8 x i1> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY50:%.*]] ; CHECK: vec.epilog.vector.body50: -; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH47]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] -; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] -; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] +; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] +; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] +; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY50]] ] ; CHECK-NEXT: [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] @@ -203,16 +205,16 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] -; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY58]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: vec.epilog.middle.block46: +; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY50]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: vec.epilog.middle.block40: ; CHECK-NEXT: [[CMP_N75:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] -; CHECK-NEXT: br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH46]] -; CHECK: vec.epilog.scalar.ph47: +; CHECK-NEXT: br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH41]] +; CHECK: vec.epilog.scalar.ph41: ; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ] ; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] ; CHECK: for.body.us: -; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH46]] ] +; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH41]] ] ; CHECK-NEXT: [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL59]], [[VEC_EPILOG_SCALAR_PH46]] ] ; CHECK-NEXT: [[TMP56:%.*]] = sub nsw i64 8, [[INDVARS_IV78]] ; CHECK-NEXT: [[ADD_PTR_US:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV78]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index c342d2f81e979..f8a192f61bbf7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -35,10 +35,11 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VEC_TC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next.p, %loop.latch ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next.p, %loop.latch ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %iv.next = add i64 %iv, 1 ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index f07d1af47af02..d70c874499cb7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -19,6 +19,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%and> + vp<[[VTC]]> * ir<-1> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%A> + vp<[[VTC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -43,11 +45,13 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: IR %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME1]]> from scalar.ph) +; CHECK-NEXT: IR %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph) ; CHECK: IR %cmp = icmp eq i64 %iv.next, 0 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -69,8 +73,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %n.mod.vf = urem i64 %and, 16 ; CHECK-NEXT: IR %n.vec = sub i64 %and, %n.mod.vf -; CHECK-NEXT: IR %ind.end = sub i64 %and, %n.vec -; CHECK-NEXT: IR %ind.end1 = getelementptr i8, ptr %A, i64 %n.vec +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%and> + ir<[[VTC]]> * ir<-1> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%A> + ir<[[VTC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -103,8 +107,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi ir<%ind.end>, ir<%and> -; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi ir<%ind.end1>, ir<%A> +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: From 071392e078f46c8f0cbff8338c2dece63314ae16 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 12 Dec 2024 22:03:56 +0000 Subject: [PATCH 05/17] !fixup address latest comment, thanks --- .../Transforms/Vectorize/LoopVectorize.cpp | 23 +++++++++---------- .../Transforms/Vectorize/VPlanPatternMatch.h | 4 ++-- .../Transforms/Vectorize/VPlanTransforms.cpp | 7 +++++- .../AArch64/sve-tail-folding-forced.ll | 1 + .../LoopVectorize/AArch64/sve-widen-gep.ll | 2 ++ .../AArch64/sve2-histcnt-vplan.ll | 6 +++-- .../AArch64/synthesize-mask-for-call.ll | 14 +++++++---- .../widen-call-with-intrinsic-or-libfunc.ll | 6 +++-- .../LoopVectorize/X86/conversion-cost.ll | 2 +- .../LoopVectorize/X86/float-induction-x86.ll | 4 ++-- .../LoopVectorize/X86/scatter_crash.ll | 14 +++++------ .../first-order-recurrence-chains-vplan.ll | 12 ++++++---- ...-order-recurrence-sink-replicate-region.ll | 19 ++++++++++----- .../interleave-and-scalarize-only.ll | 11 ++++++--- .../uncountable-early-exit-vplan.ll | 9 +++++--- .../LoopVectorize/vplan-predicate-switch.ll | 4 ++-- .../vplan-sink-scalars-and-merge-vf1.ll | 7 ++++-- .../vplan-sink-scalars-and-merge.ll | 9 ++++++-- 18 files changed, 99 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0d3129be97fc2..0b19015497105 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2643,14 +2643,12 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValue( B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); // Compute the end value for the additional bypass. - if (MainVectorTripCount) { - B.SetInsertPoint(getAdditionalBypassBlock(), - getAdditionalBypassBlock()->getFirstInsertionPt()); - EndValueFromAdditionalBypass = - emitTransformedIndex(B, MainVectorTripCount, II.getStartValue(), Step, - II.getKind(), II.getInductionBinOp()); - EndValueFromAdditionalBypass->setName("ind.end"); - } + B.SetInsertPoint(getAdditionalBypassBlock(), + getAdditionalBypassBlock()->getFirstInsertionPt()); + EndValueFromAdditionalBypass = + emitTransformedIndex(B, MainVectorTripCount, II.getStartValue(), Step, + II.getKind(), II.getInductionBinOp()); + EndValueFromAdditionalBypass->setName("ind.end"); } // Store the bypass value here, as it needs to be added as operand to its @@ -9722,11 +9720,12 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { Value *Step = State.get(getStepValue(), VPLane(0)); Value *Index = State.get(getOperand(1), VPLane(0)); Value *DerivedIV = emitTransformedIndex( - State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, - Kind, cast_if_present(FPBinOp)); + State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, Kind, + cast_if_present(FPBinOp)); DerivedIV->setName(Name); - // Index may only bet set to constant 0 in prepareToExecute. - assert((DerivedIV != Index || cast(Index)->isNullValue()) && "IV didn't need transforming?"); + // Index may only be set to constant 0 in prepareToExecute. + assert((DerivedIV != Index || cast(Index)->isNullValue()) && + "IV didn't need transforming?"); State.set(this, DerivedIV, VPLane(0)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index ae577429c51f8..ec3c203a61b38 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -391,8 +391,8 @@ using VPDerivedIV_match = Recipe_match, 0, false, VPDerivedIVRecipe>; template -inline VPDerivedIV_match m_DerivedIV(const Op0_t &Op0, - const Op1_t &Op1, const Op2_t &Op2) { +inline VPDerivedIV_match +m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { return VPDerivedIV_match({Op0, Op1, Op2}); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 676661eb7cc88..7df4480d6ae85 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1061,7 +1061,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return R.getVPSingleValue()->replaceAllUsesWith(A); - if ((match(&R, m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) || match(&R, m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue())) ) && TypeInfo.inferScalarType(R.getOperand(1)) == TypeInfo.inferScalarType(R.getVPSingleValue())) + if ((match(&R, + m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) || + match(&R, + m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) && + TypeInfo.inferScalarType(R.getOperand(1)) == + TypeInfo.inferScalarType(R.getVPSingleValue())) return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1)); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index d81cfbf08ec93..a67d4f8a19ff7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -11,6 +11,7 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-LABEL: Checking a loop in 'simple_memset' ; VPLANS: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { ; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; VPLANS-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; VPLANS-NEXT: vp<[[TC:%[0-9]+]]> = original trip-count ; VPLANS-EMPTY: ; VPLANS-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 1ee6083eb59a5..6bdc2094beb8b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -17,6 +17,8 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%start.1> + vp<[[VEC_TC]]> * ir<8> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%start.2> + vp<[[VEC_TC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll index 3ef99ff496a68..de718af76ba41 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll @@ -45,10 +45,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi [[VTC]], ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -90,10 +91,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi [[VTC]], ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 8ac46fe7687d2..0c246c6ee93e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -43,10 +43,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -88,10 +89,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -138,6 +140,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -183,10 +186,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -232,6 +236,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -277,10 +282,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: IR %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %exitcond = icmp eq i64 %indvars.iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index 648f6e874abbe..a119707bed120 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -41,10 +41,11 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %cmp = icmp ne i64 %iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -86,10 +87,11 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: IR %cmp = icmp ne i64 %iv.next, 1024 ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index a1cc59909adbe..f50dffb9ddf15 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -42,8 +42,8 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END4:%.*]] = add i64 3, [[N_VEC3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 12eb3bd03348a..b83f56cdc4bb0 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -61,8 +61,8 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[FOR_BODY]], label [[VEC_EPILOG_PH]] ; AUTO_VEC: vec.epilog.ph: -; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[N_VEC3:%.*]] = and i64 [[ZEXT]], 2147483644 ; AUTO_VEC-NEXT: [[DOTCAST5:%.*]] = uitofp nneg i64 [[N_VEC3]] to float ; AUTO_VEC-NEXT: [[TMP7:%.*]] = fmul fast float [[DOTCAST5]], 5.000000e-01 @@ -441,8 +441,8 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[FOR_BODY]], label [[VEC_EPILOG_PH]] ; AUTO_VEC: vec.epilog.ph: -; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[N_VEC6:%.*]] = and i64 [[TMP0]], 4294967292 ; AUTO_VEC-NEXT: [[DOTCAST8:%.*]] = uitofp nneg i64 [[N_VEC6]] to float ; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul reassoc float [[DOTCAST8]], 4.200000e+01 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 38440f2e49514..bb8f43abc59e8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -117,7 +117,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK25:%.*]] = icmp ult i64 [[TMP28]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH46:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK25:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH41:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK25:%.*]] ; CHECK: vector.main.loop.iter.check25: ; CHECK-NEXT: [[MIN_ITERS_CHECK28:%.*]] = icmp ult i64 [[TMP28]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label [[VEC_EPILOG_PH42:%.*]], label [[VECTOR_PH30:%.*]] @@ -163,11 +163,11 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] ; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH46]], label [[VEC_EPILOG_PH42]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH41]], label [[VEC_EPILOG_PH42]] ; CHECK: vec.epilog.ph42: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] ; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] -; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] +; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] ; CHECK-NEXT: [[N_MOD_VF52:%.*]] = urem i64 [[TMP28]], 8 ; CHECK-NEXT: [[N_VEC53:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF52]] ; CHECK-NEXT: [[TMP43:%.*]] = mul i64 [[N_VEC53]], 2 @@ -205,17 +205,17 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] -; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY50]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK40:%.*]], label [[VEC_EPILOG_VECTOR_BODY50]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: vec.epilog.middle.block40: ; CHECK-NEXT: [[CMP_N75:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH41]] ; CHECK: vec.epilog.scalar.ph41: -; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ] -; CHECK-NEXT: [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ] +; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[ITER_CHECK27]] ] +; CHECK-NEXT: [[BC_RESUME_VAL67:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[ITER_CHECK27]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] ; CHECK: for.body.us: ; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH41]] ] -; CHECK-NEXT: [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL59]], [[VEC_EPILOG_SCALAR_PH46]] ] +; CHECK-NEXT: [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL67]], [[VEC_EPILOG_SCALAR_PH41]] ] ; CHECK-NEXT: [[TMP56:%.*]] = sub nsw i64 8, [[INDVARS_IV78]] ; CHECK-NEXT: [[ADD_PTR_US:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV78]] ; CHECK-NEXT: [[TMP57:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV70]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 0eab97b0cc735..f204e367352fd 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -45,12 +45,13 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> ; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -125,13 +126,14 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> ; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]>.1 = resume-phi vp<[[RESUME_2]]>.1, ir<33> ; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]>.2 = resume-phi vp<[[RESUME_3]]>.2, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK-NEXT: IR %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ] (extra operand: vp<[[RESUME_2_P]]>.1 from scalar.ph) ; CHECK-NEXT: IR %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ] (extra operand: vp<[[RESUME_3_P]]>.2 from scalar.ph) -; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %exitcond.not = icmp eq i64 %iv.next, 1000 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -205,12 +207,13 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_X:%.+]]> = resume-phi vp<[[EXT_X]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_Y:%.+]]>.1 = resume-phi vp<[[EXT_Y]]>.1, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) ; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors @@ -282,12 +285,13 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_X:%.+]]> = resume-phi vp<[[EXT_X]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_Y:%.+]]>.1 = resume-phi vp<[[EXT_Y]]>.1, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK-NEXT: IR %for.x = phi i64 [ %for.x.next, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_X]]> from scalar.ph) ; CHECK-NEXT: IR %for.y = phi i32 [ %for.x.prev, %loop ], [ 0, %entry ] (extra operand: vp<[[RESUME_Y]]>.1 from scalar.ph) ; CHECK: No successors diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index d59573e7f4678..cb12864eb3d8f 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -82,11 +82,12 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -172,11 +173,12 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -245,12 +247,13 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> ; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK-NEXT: IR %and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ] ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors @@ -361,11 +364,12 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %0 = phi i32 [ 0, %entry ], [ %conv, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %ec = icmp eq i32 %iv.next, 20001 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -458,11 +462,12 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[VEC_TC]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) -; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK: IR %C = icmp sgt i32 %iv.next, %recur.next ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -503,6 +508,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<2> + vp<[[VEC_TC]]> * ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -546,11 +552,12 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.*]]> = resume-phi vp<[[END]]>, ir<2> ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; CHECK-NEXT: IR %.pn = phi i32 [ 0, %entry ], [ %l, %loop ] (extra operand: vp<[[RESUME_1_P]]> from scalar.ph) ; CHECK: IR %ec = icmp ugt i64 %iv, 3 ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index bb17580ac4d11..5bc832fbd6842 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -14,6 +14,7 @@ ; DBG-NEXT: Successor(s): vector.ph ; DBG-EMPTY: ; DBG-NEXT: vector.ph: +; DBG-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VEC_TC]]> * ir<1> ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { @@ -76,6 +77,7 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): vector.ph ; DBG-EMPTY: ; DBG-NEXT: vector.ph: +; DBG-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir + vp<[[VEC_TC]]> * ir ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { @@ -116,11 +118,13 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): ir-bb, scalar.ph ; DBG-EMPTY: ; DBG-NEXT: scalar.ph: +; DBG-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[VEC_TC]]>, ir<0> +; DBG-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END]]>, ir ; DBG-NEXT: Successor(s): ir-bb ; DBG-EMPTY: ; DBG-NEXT: ir-bb: -; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] -; DBG-NEXT: IR %d = phi i1 [ false, %entry ], [ %d.next, %loop.latch ] +; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] (extra operand: vp<[[RESUME1]]> from scalar.ph) +; DBG-NEXT: IR %d = phi i1 [ false, %entry ], [ %d.next, %loop.latch ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph) ; DBG-NEXT: IR %d.next = xor i1 %d, true ; DBG-NEXT: No successors ; DBG-EMPTY: @@ -222,11 +226,12 @@ exit: ; DBG-NEXT: Successor(s): ir-bb, scalar.ph ; DBG-EMPTY: ; DBG-NEXT: scalar.ph: +; DBG-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; DBG-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; DBG-NEXT: Successor(s): ir-bb ; DBG-EMPTY: ; DBG-NEXT: ir-bb: -; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +; DBG-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME_IV]]> from scalar.ph) ; DBG-NEXT: IR %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] (extra operand: vp<[[RESUME_P]]> from scalar.ph) ; DBG: IR %ec = icmp slt i32 %iv.next.trunc, %n ; DBG-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll index 730dbfe84070a..0242b702ef23d 100644 --- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -49,10 +49,11 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -133,10 +134,11 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values() { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -210,10 +212,11 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; CHECK-NEXT: ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] +; CHECK-NEXT: IR %iv = phi i64 [ %inc, %loop.latch ], [ 0, %entry ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index 484e1ea8de0d2..dd3b50b3e060c 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -14,7 +14,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, 2 ; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf -; CHECK-NEXT: IR %ind.end = getelementptr i8, ptr %start, i64 %n.vec +; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + ir<%n.vec> * ir<1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -94,7 +94,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi ir<%ind.end>, ir<%start> +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[END]]>, ir<%start> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index a939b1e923a91..b6391e0457697 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -16,6 +16,7 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<0> + vp<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -53,11 +54,13 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[VEC_TC]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END]]>, ir<0> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ] -; CHECK-NEXT: IR %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ] +; CHECK-NEXT: IR %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ] (extra operand: vp<[[RESUME1]]> from scalar.ph) +; CHECK-NEXT: IR %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph) ; CHECK: IR %tmp5 = trunc i32 %tmp4 to i8 ; CHECK-NEXT: No successors ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 94aefdee82b14..b4742a34fe886 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -262,6 +262,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<21> + vp<[[VEC_TC]]> * ir<1> ; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr inbounds ir<%A>, ir<0> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -1046,6 +1047,7 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%n> + vp<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -1086,10 +1088,11 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[END]]>, ir<%n> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ] +; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1 ; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv ; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16 @@ -1134,6 +1137,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END:%.+]]> = DERIVED-IV ir<%start> + vp<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -1177,10 +1181,11 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[END]]>, ir<%start> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ] +; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ] (extra operand: vp<[[RESUME]]> from scalar.ph) ; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1 ; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1 ; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0 From b48b097d194e87f2542d99d1df76f63a3fdb7feb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 12 Dec 2024 22:25:04 +0000 Subject: [PATCH 06/17] !fixup update remaining tests --- .../LoopVectorize/AArch64/sve-widen-gep.ll | 7 ++-- .../RISCV/riscv-vector-reverse.ll | 34 +++++++++++-------- .../RISCV/vplan-vp-intrinsics-reduction.ll | 12 ++++--- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 6bdc2094beb8b..ba1a765790fbb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -57,11 +57,12 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 + ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -152,9 +153,9 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index eb60c24393df9..951d833fa941e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -64,6 +64,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + vp<[[VEC_TC]]> * ir<-1> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + vp<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -92,11 +94,13 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%0> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%n> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] -; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME1]]> from scalar.ph) +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph) ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -181,11 +185,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: IR %16 = mul i64 %15, 4 ; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, %16 ; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf -; CHECK-NEXT: IR %ind.end = sub i64 %0, %n.vec -; CHECK-NEXT: IR %.cast = trunc i64 %n.vec to i32 -; CHECK-NEXT: IR %ind.end3 = sub i32 %n, %.cast ; CHECK-NEXT: IR %17 = call i64 @llvm.vscale.i64() ; CHECK-NEXT: IR %18 = mul i64 %17, 4 +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + ir<[[VEC_TC]]> * ir<-1> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + ir<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -217,8 +220,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = resume-phi ir<%ind.end>, ir<%0> -; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = resume-phi ir<%ind.end3>, ir<%n> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = resume-phi vp<[[END1]]>, ir<%0> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%n> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -311,6 +314,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + vp<[[VEC_TC]]> * ir<-1> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + vp<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -339,11 +344,13 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%0> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%n> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] -; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME1]]> from scalar.ph) +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ] (extra operand: vp<[[RESUME2]]>.1 from scalar.ph) ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors ; CHECK-EMPTY: @@ -428,11 +435,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: IR %16 = mul i64 %15, 4 ; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, %16 ; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf -; CHECK-NEXT: IR %ind.end = sub i64 %0, %n.vec -; CHECK-NEXT: IR %.cast = trunc i64 %n.vec to i32 -; CHECK-NEXT: IR %ind.end3 = sub i32 %n, %.cast ; CHECK-NEXT: IR %17 = call i64 @llvm.vscale.i64() ; CHECK-NEXT: IR %18 = mul i64 %17, 4 +; CHECK-NEXT: vp<[[END1:%.+]]> = DERIVED-IV ir<%0> + ir<[[VEC_TC]]> * ir<-1> +; CHECK-NEXT: vp<[[END2:%.+]]> = DERIVED-IV ir<%n> + ir<[[VEC_TC]]> * ir<-1> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -464,8 +470,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi ir<%ind.end>, ir<%0> -; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi ir<%ind.end3>, ir<%n> +; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%0> +; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%n> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index e7eb5778ffb93..cd1d734f00eaa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -63,11 +63,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: scalar.ph: +; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: ir-bb: -; IF-EVL-OUTLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; IF-EVL-OUTLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[IV_RESUME]]> from scalar.ph) ; IF-EVL-OUTLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] ; IF-EVL-OUTLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n ; IF-EVL-OUTLOOP-NEXT: No successors @@ -113,11 +114,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: +; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: ir-bb: -; IF-EVL-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; IF-EVL-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[IV_RESUME]]> from scalar.ph) ; IF-EVL-INLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] ; IF-EVL-INLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n ; IF-EVL-INLOOP-NEXT: No successors @@ -159,11 +161,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[IV_RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: ir-bb: -; NO-VP-OUTLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; NO-VP-OUTLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[IV_RESUME]]> from scalar.ph) ; NO-VP-OUTLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] ; NO-VP-OUTLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n ; NO-VP-OUTLOOP-NEXT: No successors @@ -205,11 +208,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb, scalar.ph ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: +; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0> ; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; NO-VP-INLOOP-NEXT: Successor(s): ir-bb ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: ir-bb: -; NO-VP-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] +; NO-VP-INLOOP-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[IV_RESUME]]> from scalar.ph) ; NO-VP-INLOOP-NEXT: IR %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ] ; NO-VP-INLOOP: IR %exitcond.not = icmp eq i64 %iv.next, %n ; NO-VP-INLOOP-NEXT: No successors From 5eb0cbce9b4861ca4e05464f7f12a06ffb9c2948 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 13 Dec 2024 14:10:59 +0000 Subject: [PATCH 07/17] !fixup update last remaining phaseordering test --- .../Transforms/PhaseOrdering/ARM/arm_mult_q15.ll | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll index ef651010d6a90..66a0771c8d373 100644 --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll @@ -15,7 +15,7 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS ; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER18:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER15:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8 ; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 7 @@ -48,18 +48,18 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[BLOCKSIZE]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER18]] -; CHECK: while.body.preheader18: +; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER15]] +; CHECK: while.body.preheader15: ; CHECK-NEXT: [[BLKCNT_06_PH:%.*]] = phi i32 [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[PSRCA_ADDR_05_PH:%.*]] = phi ptr [ [[PSRCA]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[PDST_ADDR_04_PH:%.*]] = phi ptr [ [[PDST]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[PSRCB_ADDR_03_PH:%.*]] = phi ptr [ [[PSRCB]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: -; CHECK-NEXT: [[BLKCNT_06:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BLKCNT_06_PH]], [[WHILE_BODY_PREHEADER18]] ] -; CHECK-NEXT: [[PSRCA_ADDR_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRCA_ADDR_05_PH]], [[WHILE_BODY_PREHEADER18]] ] -; CHECK-NEXT: [[PDST_ADDR_04:%.*]] = phi ptr [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ], [ [[PDST_ADDR_04_PH]], [[WHILE_BODY_PREHEADER18]] ] -; CHECK-NEXT: [[PSRCB_ADDR_03:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PSRCB_ADDR_03_PH]], [[WHILE_BODY_PREHEADER18]] ] +; CHECK-NEXT: [[BLKCNT_06:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BLKCNT_06_PH]], [[WHILE_BODY_PREHEADER15]] ] +; CHECK-NEXT: [[PSRCA_ADDR_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRCA_ADDR_05_PH]], [[WHILE_BODY_PREHEADER15]] ] +; CHECK-NEXT: [[PDST_ADDR_04:%.*]] = phi ptr [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ], [ [[PDST_ADDR_04_PH]], [[WHILE_BODY_PREHEADER15]] ] +; CHECK-NEXT: [[PSRCB_ADDR_03:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PSRCB_ADDR_03_PH]], [[WHILE_BODY_PREHEADER15]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[PSRCA_ADDR_05]], i32 2 ; CHECK-NEXT: [[TMP10:%.*]] = load i16, ptr [[PSRCA_ADDR_05]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 From 8a1748a6e5f21251125cdf5aa99d20f57dec53ec Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 15 Dec 2024 14:44:04 +0000 Subject: [PATCH 08/17] !fixup use VPWidenInductionRecipe. --- .../Transforms/Vectorize/LoopVectorize.cpp | 33 +++++++------------ 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7aa5a51c5cae6..a0c5489b2ccd9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8844,37 +8844,28 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) { - PHINode *OrigPhi; - const InductionDescriptor *ID; - VPValue *Start = PhiR->getStartValue(); - VPValue *Step; - Type *ScalarTy; + auto *WideIV = dyn_cast(PhiR); + if (!WideIV) + return nullptr; + + VPValue *Start = WideIV->getStartValue(); + VPValue *Step = WideIV->getStepValue(); + const InductionDescriptor &ID = WideIV->getInductionDescriptor(); + Type *ScalarTy = TypeInfo.inferScalarType(WideIV); bool IsCanonical = false; if (auto *WideIV = dyn_cast(PhiR)) { // Truncated wide inductions resume from the last lane of their vector value // in the last vector iteration. - if (WideIV->getTruncInst()) + if (ScalarTy != TypeInfo.inferScalarType(Step)) return nullptr; - OrigPhi = cast(WideIV->getUnderlyingValue()); - ID = &WideIV->getInductionDescriptor(); - Step = WideIV->getStepValue(); - ScalarTy = WideIV->getScalarType(); IsCanonical = WideIV->isCanonical(); - } else if (auto *WideIV = dyn_cast(PhiR)) { - OrigPhi = cast(WideIV->getUnderlyingValue()); - ID = &WideIV->getInductionDescriptor(); - Step = WideIV->getOperand(1); - ScalarTy = Start->getLiveInIRValue()->getType(); - } else { - return nullptr; } VPValue *EndValue = VectorTC; if (!IsCanonical) { EndValue = VectorPHBuilder.createDerivedIV( - ID->getKind(), - dyn_cast_or_null(ID->getInductionBinOp()), Start, - VectorTC, Step); + ID.getKind(), dyn_cast_or_null(ID.getInductionBinOp()), + Start, VectorTC, Step); } // EndValue is based on the vector trip count (which has the same type as the @@ -8886,7 +8877,7 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start}, - OrigPhi->getDebugLoc(), "bc.resume.val"); + WideIV->getDebugLoc(), "bc.resume.val"); return ResumePhiRecipe; } From bbdc0bbd017a9a75f9b155df8950665d2e9cff95 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 15 Dec 2024 14:45:48 +0000 Subject: [PATCH 09/17] [VPlan] Get types and step from VPWidenPointerInductionRecipe (NFC). --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 +++-- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 022c0f5a173a6..1a891e5b79feb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3194,7 +3194,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { // A pointer induction, performed by using a gep BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); Value *ScalarStepValue = State.get(getOperand(1), VPLane(0)); - Type *PhiType = IndDesc.getStep()->getType(); + Type *PhiType = State.TypeAnalysis.inferScalarType(getOperand(1)); Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); // Add induction update using an incorrect block temporarily. The phi node // will be fixed after VPlan execution. Note that at this point the latch @@ -3246,7 +3246,8 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, printAsOperand(O, SlotTracker); O << " = WIDEN-POINTER-INDUCTION "; getStartValue()->printAsOperand(O, SlotTracker); - O << ", " << *IndDesc.getStep(); + O << ", "; + getOperand(1)->printAsOperand(O, SlotTracker); if (getNumOperands() == 4) { O << ", "; getOperand(2)->printAsOperand(O, SlotTracker); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 1ee6083eb59a5..de1500421a915 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -22,7 +22,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 +; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, ir<1> ; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8> ; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8> ; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]> From 89a1b3a915d8752c51be122e1ee1feedf5168722 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 15 Dec 2024 15:03:44 +0000 Subject: [PATCH 10/17] [VPlan] Add VPWidenInduction recipe as common base class (NFC). This helps to simplify some existing code and new code (https://github.com/llvm/llvm-project/pull/112145) --- .../Transforms/Vectorize/LoopVectorize.cpp | 12 +- llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlan.h | 110 ++++++++++-------- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 23 ++-- .../LoopVectorize/vplan-printing.ll | 4 +- 5 files changed, 78 insertions(+), 73 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d37e826412004..e8b1201eb2742 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7941,10 +7941,8 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton( SmallPtrSet WideIVs; for (VPRecipeBase &H : EPI.EpiloguePlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (auto *WideIV = dyn_cast(&H)) + if (auto *WideIV = dyn_cast(&H)) WideIVs.insert(WideIV->getPHINode()); - else if (auto *PtrIV = dyn_cast(&H)) - WideIVs.insert(cast(PtrIV->getUnderlyingValue())); } createInductionResumeVPValues(ExpandedSCEVs, nullptr, &WideIVs); @@ -10131,13 +10129,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, } else { // Retrieve the induction resume values for wide inductions from // their original phi nodes in the scalar loop. - PHINode *IndPhi = nullptr; - if (auto *Ind = dyn_cast(&R)) { - IndPhi = cast(Ind->getUnderlyingValue()); - } else { - auto *WidenInd = cast(&R); - IndPhi = WidenInd->getPHINode(); - } + PHINode *IndPhi = dyn_cast(&R)->getPHINode(); // Hook up to the PHINode generated by a ResumePhi recipe of main // loop VPlan, which feeds the scalar loop. ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader()); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index d3476399dabf1..432030a7b1adf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1047,7 +1047,7 @@ void VPlan::execute(VPTransformState *State) { if (isa(&R)) continue; - if (isa(&R)) { + if (isa(&R)) { PHINode *Phi = nullptr; if (isa(&R)) { Phi = cast(State->get(R.getVPSingleValue())); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e8902009455ef..12208a7968338 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2099,38 +2099,81 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe { } }; +/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and +/// VPWidenPointerInductionRecipe), providing shared functionality, including +/// retrieving the step value, induction descriptor and original phi node. +class VPWidenInductionRecipe : public VPHeaderPHIRecipe { + const InductionDescriptor &IndDesc; + +public: + VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, + VPValue *Step, const InductionDescriptor &IndDesc, + DebugLoc DL) + : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) { + addOperand(Step); + } + + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC || + R->getVPDefID() == VPDef::VPWidenPointerInductionSC; + } + + virtual void execute(VPTransformState &State) override = 0; + + /// Returns the step value of the induction. + VPValue *getStepValue() { return getOperand(1); } + const VPValue *getStepValue() const { return getOperand(1); } + + PHINode *getPHINode() const { return cast(getUnderlyingValue()); } + + /// Returns the induction descriptor for the recipe. + const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } + + VPValue *getBackedgeValue() override { + // TODO: All operands of base recipe must exist and be at same index in + // derived recipe. + llvm_unreachable( + "VPWidenIntOrFpInductionRecipe generates its own backedge value"); + } + + VPRecipeBase &getBackedgeRecipe() override { + // TODO: All operands of base recipe must exist and be at same index in + // derived recipe. + llvm_unreachable( + "VPWidenIntOrFpInductionRecipe generates its own backedge value"); + } +}; + /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their vector values. -class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { - PHINode *IV; +class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { TruncInst *Trunc; - const InductionDescriptor &IndDesc; public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL) - : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, DL), - IV(IV), Trunc(nullptr), IndDesc(IndDesc) { - addOperand(Step); + : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, + Step, IndDesc, DL), + Trunc(nullptr) { addOperand(VF); } VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL) - : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start, DL), - IV(IV), Trunc(Trunc), IndDesc(IndDesc) { - addOperand(Step); + : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start, + Step, IndDesc, DL), + Trunc(Trunc) { addOperand(VF); } ~VPWidenIntOrFpInductionRecipe() override = default; VPWidenIntOrFpInductionRecipe *clone() override { - return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(), - getStepValue(), getVFValue(), - IndDesc, Trunc, getDebugLoc()); + return new VPWidenIntOrFpInductionRecipe( + getPHINode(), getStartValue(), getStepValue(), getVFValue(), + getInductionDescriptor(), Trunc, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) @@ -2145,24 +2188,6 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { VPSlotTracker &SlotTracker) const override; #endif - VPValue *getBackedgeValue() override { - // TODO: All operands of base recipe must exist and be at same index in - // derived recipe. - llvm_unreachable( - "VPWidenIntOrFpInductionRecipe generates its own backedge value"); - } - - VPRecipeBase &getBackedgeRecipe() override { - // TODO: All operands of base recipe must exist and be at same index in - // derived recipe. - llvm_unreachable( - "VPWidenIntOrFpInductionRecipe generates its own backedge value"); - } - - /// Returns the step value of the induction. - VPValue *getStepValue() { return getOperand(1); } - const VPValue *getStepValue() const { return getOperand(1); } - VPValue *getVFValue() { return getOperand(2); } const VPValue *getVFValue() const { return getOperand(2); } @@ -2177,11 +2202,6 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { TruncInst *getTruncInst() { return Trunc; } const TruncInst *getTruncInst() const { return Trunc; } - PHINode *getPHINode() { return IV; } - - /// Returns the induction descriptor for the recipe. - const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } - /// Returns true if the induction is canonical, i.e. starting at 0 and /// incremented by UF * VF (= the original IV is incremented by 1) and has the /// same type as the canonical induction. @@ -2189,7 +2209,7 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { /// Returns the scalar type of the induction. Type *getScalarType() const { - return Trunc ? Trunc->getType() : IV->getType(); + return Trunc ? Trunc->getType() : getPHINode()->getType(); } /// Returns the VPValue representing the value of this induction at @@ -2200,10 +2220,8 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { } }; -class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe, +class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe, public VPUnrollPartAccessor<3> { - const InductionDescriptor &IndDesc; - bool IsScalarAfterVectorization; public: @@ -2212,19 +2230,16 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe, VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL) - : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi, nullptr, DL), - IndDesc(IndDesc), - IsScalarAfterVectorization(IsScalarAfterVectorization) { - addOperand(Start); - addOperand(Step); - } + : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start, + Step, IndDesc, DL), + IsScalarAfterVectorization(IsScalarAfterVectorization) {} ~VPWidenPointerInductionRecipe() override = default; VPWidenPointerInductionRecipe *clone() override { return new VPWidenPointerInductionRecipe( cast(getUnderlyingInstr()), getOperand(0), getOperand(1), - IndDesc, IsScalarAfterVectorization, getDebugLoc()); + getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) @@ -2235,9 +2250,6 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe, /// Returns true if only scalar values will be generated. bool onlyScalarsGenerated(bool IsScalable); - /// Returns the induction descriptor for the recipe. - const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } - /// Returns the VPValue representing the value of this induction at /// the first unrolled part, if it exists. Returns itself if unrolling did not /// take place. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1a891e5b79feb..b830e104539fd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1660,12 +1660,13 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { const InductionDescriptor &ID = getInductionDescriptor(); TruncInst *Trunc = getTruncInst(); IRBuilderBase &Builder = State.Builder; - assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); + assert(getPHINode()->getType() == ID.getStartValue()->getType() && + "Types must match"); assert(State.VF.isVector() && "must have vector VF"); // The value from the original loop to which we are mapping the new induction // variable. - Instruction *EntryVal = Trunc ? cast(Trunc) : IV; + Instruction *EntryVal = Trunc ? cast(Trunc) : getPHINode(); // Fast-math-flags propagate from the original induction instruction. IRBuilder<>::FastMathFlagGuard FMFG(Builder); @@ -1755,13 +1756,12 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-INDUCTION"; - if (getTruncInst()) { + if (auto *TI = getTruncInst()) { O << "\\l\""; - O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; - O << " +\n" << Indent << "\" "; - getVPValue(0)->printAsOperand(O, SlotTracker); + O << " +\n" << Indent << "\" " << VPlanIngredient(getPHINode()) << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(TI); } else - O << " " << VPlanIngredient(IV); + O << " " << VPlanIngredient(getPHINode()); O << ", "; getStepValue()->printAsOperand(O, SlotTracker); @@ -3157,7 +3157,8 @@ bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { } void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { - assert(IndDesc.getKind() == InductionDescriptor::IK_PtrInduction && + assert(getInductionDescriptor().getKind() == + InductionDescriptor::IK_PtrInduction && "Not a pointer induction according to InductionDescriptor!"); assert(cast(getUnderlyingInstr())->getType()->isPointerTy() && "Unexpected type."); @@ -3193,8 +3194,8 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { // A pointer induction, performed by using a gep BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint(); - Value *ScalarStepValue = State.get(getOperand(1), VPLane(0)); - Type *PhiType = State.TypeAnalysis.inferScalarType(getOperand(1)); + Value *ScalarStepValue = State.get(getStepValue(), VPLane(0)); + Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue()); Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF); // Add induction update using an incorrect block temporarily. The phi node // will be fixed after VPlan execution. Note that at this point the latch @@ -3247,7 +3248,7 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, O << " = WIDEN-POINTER-INDUCTION "; getStartValue()->printAsOperand(O, SlotTracker); O << ", "; - getOperand(1)->printAsOperand(O, SlotTracker); + getStepValue()->printAsOperand(O, SlotTracker); if (getNumOperands() == 4) { O << ", "; getOperand(2)->printAsOperand(O, SlotTracker); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index c526c53dbea06..dccbed63a161b 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -655,10 +655,10 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]> ; CHECK-NEXT: WIDEN-INDUCTION\l" + ; CHECK-NEXT: " %iv = phi %iv.next, 0\l" + -; CHECK-NEXT: " ir<%v2>, vp<[[EXP_SCEV]]>, vp<[[VF]]> +; CHECK-NEXT: " %v2 = trunc %iv, vp<[[EXP_SCEV]]>, vp<[[VF]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * vp<[[EXP_SCEV]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[EXP_SCEV]]> -; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%v2>, ir<1> +; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%iv>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%v3>, ir<%gep> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> From 378caa245c95572207183f4425215e4769284e16 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 19 Dec 2024 13:59:15 +0000 Subject: [PATCH 11/17] !fixup update tests after update to main --- .../AArch64/low_trip_count_predicates.ll | 2 ++ .../AArch64/sve-interleaved-accesses.ll | 6 +++--- .../LoopVectorize/PowerPC/exit-branch-cost.ll | 8 ++++++++ .../LoopVectorize/RISCV/induction-costs.ll | 4 +++- .../LoopVectorize/RISCV/strided-accesses.ll | 4 ++++ .../LoopVectorize/X86/induction-costs.ll | 5 +++++ llvm/test/Transforms/LoopVectorize/X86/pr72969.ll | 3 +++ .../Transforms/LoopVectorize/X86/scatter_crash.ll | 7 +++++++ .../Transforms/LoopVectorize/X86/small-size.ll | 6 ++++-- .../epilog-vectorization-any-of-reductions.ll | 5 +++++ .../first-order-recurrence-complex.ll | 4 ++++ llvm/test/Transforms/LoopVectorize/induction.ll | 15 +++++++++++++++ .../LoopVectorize/interleaved-accesses.ll | 3 +++ .../invariant-store-vectorization.ll | 2 ++ .../pr59319-loop-access-info-invalidation.ll | 3 +++ .../Transforms/LoopVectorize/reduction-align.ll | 8 +++++--- 16 files changed, 76 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 354a9fab63b53..f4fd3bc36c3f5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -128,6 +128,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS1-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS1: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-VS1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-VS1-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS1: [[WHILE_BODY]]: ; CHECK-VS1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] @@ -236,6 +237,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-VS2-NEXT: br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-VS2: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-VS2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-VS2-NEXT: br label %[[WHILE_BODY:.*]] ; CHECK-VS2: [[WHILE_BODY]]: ; CHECK-VS2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index d91b467a44865..bf95622733461 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1492,9 +1492,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP33:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 7dabf315aba44..f00aef1ca4b6f 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -144,6 +144,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_N33:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]] ; CHECK-NEXT: br i1 [[CMP_N33]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL34:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX35:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL36:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], %[[ITER_CHECK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL34]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX35]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL36]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[PTR_IV]], align 1 ; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP53]] to i64 ; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index f5faa37874cbd..e4425a9327385 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -91,10 +91,12 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X_I64]], %[[VECTOR_MEMCHECK]] ], [ [[X_I64]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i32 [ [[IND_END22]], %[[MIDDLE_BLOCK]] ], [ [[X_I32]], %[[VECTOR_MEMCHECK]] ], [ [[X_I32]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_CONV:%.*]] = phi i32 [ [[BC_RESUME_VAL25]], %[[SCALAR_PH]] ], [ [[TMP64:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_CONV:%.*]] = phi i32 [ [[BC_RESUME_VAL13]], %[[SCALAR_PH]] ], [ [[TMP64:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_I64:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[TMP61:%.*]] = load i64, ptr [[GEP_I64]], align 8 ; CHECK-NEXT: [[TMP62:%.*]] = sext i32 [[IV_CONV]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index d45b0248ecd79..30cb33e64eccf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -346,6 +346,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: +; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] @@ -621,6 +623,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; NOSTRIDED: scalar.ph: +; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; NOSTRIDED-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: ; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 7c76bda2decbc..68cbfad91c541 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -347,6 +347,11 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP22]], i32 15 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[VECTOR_MEMCHECK]] ], [ 100, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[VECTOR_MEMCHECK]] ], [ [[A]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[VECTOR_MEMCHECK]] ], [ [[B]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[VECTOR_MEMCHECK]] ], [ [[B]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 09a97c57da012..39ea2f4ab405f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -86,6 +86,9 @@ define void @test(ptr %p) { ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VEC: scalar.ph: +; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ] +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY]] ] +; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; VEC-NEXT: br label [[FOR_BODY:%.*]] ; VEC: for.body: ; VEC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IDX:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 40c727b414492..d96eb081c2835 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -205,6 +205,13 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] +; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK40:%.*]], label [[VEC_EPILOG_VECTOR_BODY50]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: vec.epilog.middle.block40: +; CHECK-NEXT: [[CMP_N65:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] +; CHECK-NEXT: br i1 [[CMP_N65]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH41]] +; CHECK: vec.epilog.scalar.ph41: +; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ 8, [[ITER_CHECK27]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK43]] ] +; CHECK-NEXT: [[BC_RESUME_VAL67:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ 0, [[ITER_CHECK27]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK43]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] ; CHECK: for.body.us: ; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH41]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 34e9bb7bab649..c9132bab80f19 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -151,6 +151,8 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT20]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i64 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] +; CHECK: pred.store.if20: +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 @@ -494,7 +496,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 -; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE15]] ; CHECK: pred.store.if13: ; CHECK-NEXT: [[TMP19:%.*]] = or disjoint i64 [[OFFSET_IDX5]], 12 ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]] @@ -504,7 +506,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32 ; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7 ; CHECK-NEXT: store i32 [[TMP23]], ptr [[NEXT_GEP9]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]] ; CHECK: pred.store.continue14: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index a1851b54e635f..7d9eae864ff5b 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -274,6 +274,9 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ false, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -425,6 +428,8 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i1 [ [[RDX_SELECT22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ true, [[ITER_CHECK:%.*]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[RED:%.*]] = phi i1 [ [[BC_MERGE_RDX23]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 40fd264dbd3d2..253ecaca75be8 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -852,6 +852,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -933,6 +935,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 080912ac2d5cd..2175eab9752cf 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -6236,6 +6236,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6304,6 +6307,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: +; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6376,6 +6382,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6454,6 +6463,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6526,6 +6538,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: +; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 2bf9b76e46b56..abd91d33157e6 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1508,6 +1508,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index bfd88c5677550..aef25a05ea124 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -401,6 +401,8 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[VECTOR_MEMCHECK]] ], [ [[TMP4]], [[FOR_BODY3_LR_PH]] ] ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[FOR_BODY3]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll index 33ca0ced3d9bd..a4b229d0a96b2 100644 --- a/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll @@ -91,6 +91,9 @@ define void @reduced(ptr %0, ptr %1, i64 %iv, ptr %2, i64 %iv76, i64 %iv93) { ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK21:%.*]], label [[VECTOR_BODY27]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block21: ; CHECK-NEXT: [[CMP_N27:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC25]] +; CHECK-NEXT: br i1 [[CMP_N27]], label [[LOOP_CLEANUP:%.*]], label [[SCALAR_PH22]] +; CHECK: scalar.ph22: +; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], [[MIDDLE_BLOCK21]] ], [ 0, [[VECTOR_MEMCHECK15]] ], [ 0, [[LOOP_3_LR_PH]] ] ; CHECK-NEXT: br label [[LOOP_3:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV846:%.*]] = phi i64 [ [[IV_NEXT85:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL13]], [[SCALAR_PH5]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-align.ll b/llvm/test/Transforms/LoopVectorize/reduction-align.ll index 34c6d1934eb30..3216c9233ea21 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-align.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-align.ll @@ -27,7 +27,7 @@ define void @fn(ptr %hbuf, ptr %ref, i32 %height) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[REF]], align 1, !alias.scope !0 +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[REF]], align 1, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1]] = add <4 x i16> [[BROADCAST_SPLAT]], [[VEC_PHI]] @@ -36,10 +36,12 @@ define void @fn(ptr %hbuf, ptr %ref, i32 %height) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) -; CHECK-NEXT: store i16 [[TMP3]], ptr [[HBUF]], align 1 +; CHECK-NEXT: store i16 [[TMP3]], ptr [[HBUF]], align 1, !alias.scope [[META6:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[HEIGHT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP4:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] @@ -49,7 +51,7 @@ define void @fn(ptr %hbuf, ptr %ref, i32 %height) { ; CHECK-NEXT: store i16 [[ADD]], ptr [[HBUF]], align 1 ; CHECK-NEXT: [[INC]] = add i32 [[I]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[HEIGHT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: From a76343653b30c23521c95f1c98e241dd6d737d1c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 19 Dec 2024 14:55:32 +0000 Subject: [PATCH 12/17] !fixup address latest comments, thanks --- .../Vectorize/LoopVectorizationPlanner.h | 7 +- .../Transforms/Vectorize/LoopVectorize.cpp | 172 ++++++++---------- .../Transforms/Vectorize/VPlanTransforms.cpp | 1 + 3 files changed, 82 insertions(+), 98 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index c6a27549e1509..08adc74d700aa 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -231,12 +231,15 @@ class VPBuilder { new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name)); } + /// Convert the input value \p Current to the corresponding value of an + /// induction with different start and step values, using Start + Current * + /// Step. VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, - VPValue *CanonicalIV, VPValue *Step, + VPValue *Current, VPValue *Step, const Twine &Name = "") { return tryInsertInstruction( - new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step, Name)); + new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name)); } VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fb8d8507cfecb..5dd6fd348036f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -517,16 +517,6 @@ class InnerLoopVectorizer { /// Fix the non-induction PHIs in \p Plan. void fixNonInductionPHIs(VPTransformState &State); - /// Create and record the bypass resume value for an induction Phi coming from - /// the additional bypass block. \p Step is the SCEV-expanded induction step - /// to use. \p MainVectorTripCount provides the trip count of the main vector - /// loop, used to compute the resume value reaching the scalar loop preheader - /// directly from this additional bypass block. - void createInductionAdditionalBypassValue(PHINode *OrigPhi, - const InductionDescriptor &ID, - Value *Step, - Value *MainVectorTripCount); - /// Returns the original loop trip count. Value *getTripCount() const { return TripCount; } @@ -2628,37 +2618,6 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { nullptr, Twine(Prefix) + "scalar.ph"); } -void InnerLoopVectorizer::createInductionAdditionalBypassValue( - PHINode *OrigPhi, const InductionDescriptor &II, Value *Step, - Value *MainVectorTripCount) { - Instruction *OldInduction = Legal->getPrimaryInduction(); - // For the primary induction the additional bypass end value is known. - // Otherwise it is computed. - Value *EndValueFromAdditionalBypass = MainVectorTripCount; - if (OrigPhi != OldInduction) { - IRBuilder<> B(LoopVectorPreHeader->getTerminator()); - - // Fast-math-flags propagate from the original induction instruction. - if (isa_and_nonnull(II.getInductionBinOp())) - B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); - - // Compute the end value for the additional bypass. - B.SetInsertPoint(getAdditionalBypassBlock(), - getAdditionalBypassBlock()->getFirstInsertionPt()); - EndValueFromAdditionalBypass = - emitTransformedIndex(B, MainVectorTripCount, II.getStartValue(), Step, - II.getKind(), II.getInductionBinOp()); - EndValueFromAdditionalBypass->setName("ind.end"); - } - - // Store the bypass value here, as it needs to be added as operand to its - // scalar preheader phi node after the epilogue skeleton has been created. - // TODO: Directly add as extra operand to the VPResumePHI recipe. - assert(!Induction2AdditionalBypassValue.contains(OrigPhi) && - "entry for OrigPhi already exits"); - Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass; -} - /// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV /// expansion results. static Value *getExpandedStep(const InductionDescriptor &ID, @@ -2700,11 +2659,36 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues( const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount) { assert(MainVectorTripCount && "Must have bypass information"); + Instruction *OldInduction = Legal->getPrimaryInduction(); for (const auto &InductionEntry : Legal->getInductionVars()) { PHINode *OrigPhi = InductionEntry.first; const InductionDescriptor &II = InductionEntry.second; - createInductionAdditionalBypassValue( - OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), MainVectorTripCount); + Value *Step = getExpandedStep(II, ExpandedSCEVs); + // For the primary induction the additional bypass end value is known. + // Otherwise it is computed. + Value *EndValueFromAdditionalBypass = MainVectorTripCount; + if (OrigPhi != OldInduction) { + IRBuilder<> B(LoopVectorPreHeader->getTerminator()); + + // Fast-math-flags propagate from the original induction instruction. + if (isa_and_nonnull(II.getInductionBinOp())) + B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); + + // Compute the end value for the additional bypass. + B.SetInsertPoint(getAdditionalBypassBlock(), + getAdditionalBypassBlock()->getFirstInsertionPt()); + EndValueFromAdditionalBypass = + emitTransformedIndex(B, MainVectorTripCount, II.getStartValue(), Step, + II.getKind(), II.getInductionBinOp()); + EndValueFromAdditionalBypass->setName("ind.end"); + } + + // Store the bypass value here, as it needs to be added as operand to its + // scalar preheader phi node after the epilogue skeleton has been created. + // TODO: Directly add as extra operand to the VPResumePHI recipe. + assert(!Induction2AdditionalBypassValue.contains(OrigPhi) && + "entry for OrigPhi already exits"); + Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass; } } @@ -8995,14 +8979,14 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } -/// Create a ResumePhi for \p PhiR, if it is wide induction recipe. If the -/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute -/// the end value of the induction. -static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, - VPBuilder &VectorPHBuilder, - VPBuilder &ScalarPHBuilder, - VPTypeAnalysis &TypeInfo, - VPValue *VectorTC) { +/// Create and return a ResumePhi for \p PhiR, if it is wide induction recipe. +/// If the induction recipe is not canonical, creates a VPDerivedIVRecipe to +/// compute the end value of the induction. +static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR, + VPBuilder &VectorPHBuilder, + VPBuilder &ScalarPHBuilder, + VPTypeAnalysis &TypeInfo, + VPValue *VectorTC) { auto *WideIV = dyn_cast(PhiR); if (!WideIV) return nullptr; @@ -9010,14 +8994,14 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, VPValue *Start = WideIV->getStartValue(); VPValue *Step = WideIV->getStepValue(); const InductionDescriptor &ID = WideIV->getInductionDescriptor(); - Type *ScalarTy = TypeInfo.inferScalarType(WideIV); + Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV); bool IsCanonical = false; - if (auto *WideIV = dyn_cast(PhiR)) { + if (auto *WideIntOrFp = dyn_cast(PhiR)) { // Truncated wide inductions resume from the last lane of their vector value - // in the last vector iteration. - if (ScalarTy != TypeInfo.inferScalarType(Step)) + // in the last vector iteration which is handled elsewhere. + if (WideIntOrFp->getTruncInst()) return nullptr; - IsCanonical = WideIV->isCanonical(); + IsCanonical = WideIntOrFp->isCanonical(); } VPValue *EndValue = VectorTC; @@ -9027,11 +9011,11 @@ static VPValue *addResumeValuesForInduction(VPHeaderPHIRecipe *PhiR, Start, VectorTC, Step); } - // EndValue is based on the vector trip count (which has the same type as the - // widest induction) and thus may be wider than the induction here. - if (ScalarTy != TypeInfo.inferScalarType(EndValue)) { + // EndValue is derived from the vector trip count (which has the same type as + // the widest induction) and thus may be wider than the induction here. + if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) { EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue, - ScalarTy); + ScalarTypeOfWideIV); } auto *ResumePhiRecipe = @@ -9049,10 +9033,10 @@ static void addScalarResumePhis( VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); auto *ScalarPH = Plan.getScalarPreheader(); auto *MiddleVPBB = cast(ScalarPH->getSinglePredecessor()); - VPBuilder ScalarPHBuilder(ScalarPH); VPBuilder VectorPHBuilder( cast(Plan.getVectorLoopRegion()->getSinglePredecessor())); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); + VPBuilder ScalarPHBuilder(ScalarPH); VPValue *OneVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) { @@ -9060,20 +9044,23 @@ static void addScalarResumePhis( auto *ScalarPhiI = dyn_cast(&ScalarPhiIRI->getInstruction()); if (!ScalarPhiI) break; - auto *VectorPhiR = GetHeaderPhiRecipe(ScalarPhiI); - if (VPValue *ResumePhi = addResumeValuesForInduction( - VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, - &Plan.getVectorTripCount())) { - ScalarPhiIRI->addOperand(ResumePhi); - continue; - } - if (!isa( - VectorPhiR)) { + auto *VectorPhiR = GetHeaderPhiRecipe(ScalarPhiI); + if (isa(VectorPhiR)) { + if (VPValue *ResumePhi = addResumePhiRecipeForInduction( + VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, + &Plan.getVectorTripCount())) { + ScalarPhiIRI->addOperand(ResumePhi); + continue; + } + // TODO: Also handle truncated inductions here. Computing end-values + // separately should be done as VPlan-to-VPlan optimization, after + // legalizing all resume values to use the last lane from the loop. assert(cast(VectorPhiR)->getTruncInst() && "should only skip truncated wide inductions"); continue; } + // The backedge value provides the value to resume coming out of a loop, // which for FORs is a vector whose last element needs to be extracted. The // start value provides the value if the loop is bypassed. @@ -9186,9 +9173,9 @@ addUsersInExitBlocks(VPlan &Plan, static void addExitUsersForFirstOrderRecurrences( VPlan &Plan, SetVector &ExitUsersToFix) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); - auto *ScalarPHVPBB = Plan.getScalarPreheader(); + auto *MainScalarPH = Plan.getScalarPreheader(); auto *MiddleVPBB = Plan.getMiddleBlock(); - VPBuilder ScalarPHBuilder(ScalarPHVPBB); + VPBuilder ScalarPHBuilder(MainScalarPH); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); VPValue *TwoVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2)); @@ -10201,31 +10188,27 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) !EnableLoopVectorization) {} /// Prepare \p MainPlan for vectorizing the main vector loop during epilogue -/// vectorization. Remove ResumePhis from \p MainPlan for inductions if they +/// vectorization. Remove ResumePhis from \p MainPlan for inductions that /// don't have a corresponding wide induction in \p EpiPlan. -static void preparePlanForMainVectorLoop( - VPlan &MainPlan, VPlan &EpiPlan, - const MapVector &Inductions) { - // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those - // will need their resume-values computed from the main vector loop. Others - // can be removed in the main VPlan. - SmallPtrSet WidenedPhis; +static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { + // Collect PHI nodes of widened phis in the VPlan for the epilogue. Those + // will need their resume-values computed in the main vector loop. Others + // can be removed from the main VPlan. + SmallPtrSet EpiWidenedPhis; for (VPRecipeBase &R : EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (!isa(&R)) + if (isa(&R)) continue; - if (isa(&R)) - WidenedPhis.insert(cast(&R)->getPHINode()); - else - WidenedPhis.insert( - cast(R.getVPSingleValue()->getUnderlyingValue())); + EpiWidenedPhis.insert( + cast(R.getVPSingleValue()->getUnderlyingValue())); } - for (VPRecipeBase &R : *cast(MainPlan.getScalarHeader())) { + for (VPRecipeBase &R : make_early_inc_range( + *cast(MainPlan.getScalarHeader()))) { auto *VPIRInst = cast(&R); auto *IRI = dyn_cast(&VPIRInst->getInstruction()); if (!IRI) break; - if (WidenedPhis.contains(IRI) || !Inductions.contains(IRI)) + if (EpiWidenedPhis.contains(IRI)) continue; // There is no corresponding wide induction in the epilogue plan that would // need a resume value. Set the operand in VPIRInst to zero, so ResumePhi @@ -10238,17 +10221,15 @@ static void preparePlanForMainVectorLoop( } using namespace VPlanPatternMatch; - VPBasicBlock *ScalarPHVPBB = MainPlan.getScalarPreheader(); + VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader(); VPValue *VectorTC = &MainPlan.getVectorTripCount(); // If there is no suitable resume value for the canonical induction in the - // epilogue loop, create it. - if (none_of(*ScalarPHVPBB, [VectorTC](VPRecipeBase &R) { + // scalar (which will become vector) epilogue loop, create it. + if (none_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) { return match(&R, m_VPInstruction( m_Specific(VectorTC), m_SpecificInt(0))); })) { - VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin()); - // When vectorizing the epilogue, create a resume phi for the - // canonical IV if no suitable resume phi was already created. + VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); ScalarPHBuilder.createNaryOp( VPInstruction::ResumePhi, {VectorTC, MainPlan.getOrAddLiveIn(ConstantInt::get( @@ -10721,8 +10702,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // to be vectorized by executing the plan (potentially with a different // factor) again shortly afterwards. VPlan &BestEpiPlan = LVP.getPlanFor(EpilogueVF.Width); - preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan, - LVL.getInductionVars()); + preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan); EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1, BestEpiPlan); EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index f5084096eb991..bc9544a18fedb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1064,6 +1064,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (match(&R, m_Not(m_Not(m_VPValue(A))))) return R.getVPSingleValue()->replaceAllUsesWith(A); + // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0. if ((match(&R, m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) || match(&R, From e51c6fb9bc19c93043b87eb08c4b63c6093b636f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 20 Dec 2024 12:54:18 +0000 Subject: [PATCH 13/17] fixup address latest comments, thanks! --- .../Vectorize/LoopVectorizationPlanner.h | 4 +- .../Transforms/Vectorize/LoopVectorize.cpp | 103 ++++++++---------- .../AArch64/force-target-instruction-cost.ll | 6 +- .../LoopVectorize/AArch64/intrinsiccost.ll | 12 +- .../AArch64/low_trip_count_predicates.ll | 3 + .../LoopVectorize/PowerPC/exit-branch-cost.ll | 4 +- .../LoopVectorize/X86/gather_scatter.ll | 4 +- .../LoopVectorize/X86/intrinsiccost.ll | 12 +- .../LoopVectorize/X86/scatter_crash.ll | 4 +- 9 files changed, 73 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 08adc74d700aa..26a2de8c80977 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -232,8 +232,8 @@ class VPBuilder { } /// Convert the input value \p Current to the corresponding value of an - /// induction with different start and step values, using Start + Current * - /// Step. + /// induction with \p Start and \p Step values, using \p Start + \p Current * + /// \p Step. VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0392f06e82669..84c82d7443565 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2660,6 +2660,8 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues( assert(MainVectorTripCount && "Must have bypass information"); Instruction *OldInduction = Legal->getPrimaryInduction(); + IRBuilder<> BypassBuilder(getAdditionalBypassBlock(), + getAdditionalBypassBlock()->getFirstInsertionPt()); for (const auto &InductionEntry : Legal->getInductionVars()) { PHINode *OrigPhi = InductionEntry.first; const InductionDescriptor &II = InductionEntry.second; @@ -2668,18 +2670,15 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues( // Otherwise it is computed. Value *EndValueFromAdditionalBypass = MainVectorTripCount; if (OrigPhi != OldInduction) { - IRBuilder<> B(LoopVectorPreHeader->getTerminator()); - // Fast-math-flags propagate from the original induction instruction. if (isa_and_nonnull(II.getInductionBinOp())) - B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); + BypassBuilder.setFastMathFlags( + II.getInductionBinOp()->getFastMathFlags()); // Compute the end value for the additional bypass. - B.SetInsertPoint(getAdditionalBypassBlock(), - getAdditionalBypassBlock()->getFirstInsertionPt()); - EndValueFromAdditionalBypass = - emitTransformedIndex(B, MainVectorTripCount, II.getStartValue(), Step, - II.getKind(), II.getInductionBinOp()); + EndValueFromAdditionalBypass = emitTransformedIndex( + BypassBuilder, MainVectorTripCount, II.getStartValue(), Step, + II.getKind(), II.getInductionBinOp()); EndValueFromAdditionalBypass->setName("ind.end"); } @@ -8867,21 +8866,17 @@ static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR, if (!WideIV) return nullptr; + auto *WideIntOrFp = dyn_cast(WideIV); + // Truncated wide inductions resume from the last lane of their vector value + // in the last vector iteration which is handled elsewhere. + if (WideIntOrFp && WideIntOrFp->getTruncInst()) + return nullptr; + VPValue *Start = WideIV->getStartValue(); VPValue *Step = WideIV->getStepValue(); const InductionDescriptor &ID = WideIV->getInductionDescriptor(); - Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV); - bool IsCanonical = false; - if (auto *WideIntOrFp = dyn_cast(PhiR)) { - // Truncated wide inductions resume from the last lane of their vector value - // in the last vector iteration which is handled elsewhere. - if (WideIntOrFp->getTruncInst()) - return nullptr; - IsCanonical = WideIntOrFp->isCanonical(); - } - VPValue *EndValue = VectorTC; - if (!IsCanonical) { + if (!WideIntOrFp || !WideIntOrFp->isCanonical()) { EndValue = VectorPHBuilder.createDerivedIV( ID.getKind(), dyn_cast_or_null(ID.getInductionBinOp()), Start, VectorTC, Step); @@ -8889,6 +8884,7 @@ static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR, // EndValue is derived from the vector trip count (which has the same type as // the widest induction) and thus may be wider than the induction here. + Type *ScalarTypeOfWideIV = TypeInfo.inferScalarType(WideIV); if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) { EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue, ScalarTypeOfWideIV); @@ -8903,9 +8899,7 @@ static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR, /// Create resume phis in the scalar preheader for first-order recurrences, /// reductions and inductions, and update the VPIRInstructions wrapping the /// original phis in the scalar header. -static void addScalarResumePhis( - VPlan &Plan, - function_ref GetHeaderPhiRecipe) { +static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); auto *ScalarPH = Plan.getScalarPreheader(); auto *MiddleVPBB = cast(ScalarPH->getSinglePredecessor()); @@ -8921,7 +8915,7 @@ static void addScalarResumePhis( if (!ScalarPhiI) break; - auto *VectorPhiR = GetHeaderPhiRecipe(ScalarPhiI); + auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); if (isa(VectorPhiR)) { if (VPValue *ResumePhi = addResumePhiRecipeForInduction( VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, @@ -9049,9 +9043,9 @@ addUsersInExitBlocks(VPlan &Plan, static void addExitUsersForFirstOrderRecurrences( VPlan &Plan, SetVector &ExitUsersToFix) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); - auto *MainScalarPH = Plan.getScalarPreheader(); + auto *ScalarPHVPBB = Plan.getScalarPreheader(); auto *MiddleVPBB = Plan.getMiddleBlock(); - VPBuilder ScalarPHBuilder(MainScalarPH); + VPBuilder ScalarPHBuilder(ScalarPHVPBB); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); VPValue *TwoVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2)); @@ -9317,9 +9311,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPlanTransforms::handleUncountableEarlyExit( *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); } - addScalarResumePhis(*Plan, [&RecipeBuilder](PHINode *P) { - return cast(RecipeBuilder.getRecipe(P)); - }); + addScalarResumePhis(RecipeBuilder, *Plan); SetVector ExitUsersToFix = collectUsersInExitBlocks( OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); @@ -9441,18 +9433,16 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DebugLoc()); - addScalarResumePhis( - *Plan, - [&Plan](PHINode *P) { - return find_singleton( - Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis(), - [P](VPRecipeBase &R, bool) -> VPHeaderPHIRecipe * { - auto *HeaderR = cast(&R); - return HeaderR->getUnderlyingValue() == P ? HeaderR : nullptr; - }); - } - - ); + // Collect mapping of IR header phis to header phi recipes, to be used in + // addScalarResumePhis. + VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder); + for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { + if (isa(&R)) + continue; + auto *HeaderR = cast(&R); + RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); + } + addScalarResumePhis(RecipeBuilder, *Plan); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; @@ -9747,8 +9737,12 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); DerivedIV->setName(Name); - // Index may only be set to constant 0 in prepareToExecute. - assert((DerivedIV != Index || cast(Index)->isNullValue()) && + // If index is the vector trip count, the concrete value will only be set in + // prepareToExecute, leading to missed simplifications, e.g. if it is 0. + // TODO: Remove the special case for the vector trip count once it is computed + // in VPlan and can be used during VPlan simplification. + assert((DerivedIV != Index || + getOperand(1) == &getParent()->getPlan()->getVectorTripCount()) && "IV didn't need transforming?"); State.set(this, DerivedIV, VPLane(0)); } @@ -10074,8 +10068,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { EpiWidenedPhis.insert( cast(R.getVPSingleValue()->getUnderlyingValue())); } - for (VPRecipeBase &R : make_early_inc_range( - *cast(MainPlan.getScalarHeader()))) { + for (VPRecipeBase &R : *cast(MainPlan.getScalarHeader())) { auto *VPIRInst = cast(&R); auto *IRI = dyn_cast(&VPIRInst->getInstruction()); if (!IRI) @@ -10095,19 +10088,19 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { using namespace VPlanPatternMatch; VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader(); VPValue *VectorTC = &MainPlan.getVectorTripCount(); - // If there is no suitable resume value for the canonical induction in the - // scalar (which will become vector) epilogue loop, create it. - if (none_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) { + // If there is a suitable resume value for the canonical induction in the + // scalar (which will become vector) epilogue loop we are done. Otherwise + // create it below. + if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) { return match(&R, m_VPInstruction( m_Specific(VectorTC), m_SpecificInt(0))); - })) { - VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); - ScalarPHBuilder.createNaryOp( - VPInstruction::ResumePhi, - {VectorTC, MainPlan.getOrAddLiveIn(ConstantInt::get( - MainPlan.getCanonicalIV()->getScalarType(), 0))}, - {}, "vec.epilog.resume.val"); - } + })) + return; + VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); + ScalarPHBuilder.createNaryOp( + VPInstruction::ResumePhi, + {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, + "vec.epilog.resume.val"); } /// Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index cbf9bf08c2a20..08a6001431903 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -91,8 +91,8 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] ; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] +; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] @@ -117,11 +117,11 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; CHECK-NEXT: br i1 [[CMP_N11]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], %[[ITER_CHECK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ], [ [[IND_END6]], %[[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ], [ [[IND_END2]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL9]], %[[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index 393ee8d30433b..9dceb0167a4ac 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -50,12 +50,12 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc nuw i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END8:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]] ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP6]] +; CHECK-NEXT: [[IND_END10:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_END10:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP7]] -; CHECK-NEXT: [[DOTCAST7:%.*]] = trunc nuw i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END8:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST7]] +; CHECK-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP7]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 12 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] @@ -171,10 +171,10 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] ; CHECK-NEXT: [[DOTCAST6:%.*]] = trunc nuw i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END7:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST6]] +; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 24 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index f4fd3bc36c3f5..0b2667a90c2e2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -35,6 +35,9 @@ target triple = "aarch64-unknown-linux-gnu" ; DEBUG: LV: can fold tail by masking. ; DEBUG: Executing best plan with VF=vscale x 16, UF=1 +; TODO: Clean up unused add instruction computing the end value of the +; induction, which gets created during execution of the main plan once +; VPlan's scope includes both vector loops. define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef %val) { ; CHECK-VS1-LABEL: define void @low_vf_ic_is_better( ; CHECK-VS1-SAME: ptr nocapture noundef [[P:%.*]], i32 [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index f00aef1ca4b6f..2f1af7951dbc2 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p loop-vectorize -S %s | FileCheck %s target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512" @@ -192,8 +192,6 @@ exit: ret i1 %res } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 0c6b7fdb1546e..4e8149568ae9c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -667,10 +667,10 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; AVX512: vec.epilog.iter.check: -; AVX512-NEXT: [[TMP22:%.*]] = mul i64 [[N_VEC]], 64 -; AVX512-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP22]] ; AVX512-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 4 ; AVX512-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP23]] +; AVX512-NEXT: [[TMP38:%.*]] = mul i64 [[N_VEC]], 64 +; AVX512-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP38]] ; AVX512-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll index b2772648b5ee1..f50177e61ef08 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -56,12 +56,12 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc nuw i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END10:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]] ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP12]] +; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP13]] -; CHECK-NEXT: [[DOTCAST9:%.*]] = trunc nuw i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END10:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST9]] +; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP13]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 56 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] @@ -181,10 +181,10 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] ; CHECK-NEXT: [[DOTCAST8:%.*]] = trunc nuw i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END9:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST8]] +; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP0]], 120 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index d96eb081c2835..c14c34cade6b6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -65,9 +65,9 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT99:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END12:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END9:%.*]] = add i64 8, [[TMP64]] +; CHECK-NEXT: [[IND_END12:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP6]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] @@ -158,9 +158,9 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK43:%.*]] ; CHECK: vec.epilog.iter.check43: -; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] +; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH41]], label [[VEC_EPILOG_PH42]] From 7897ab7462b5c026f66567adf84bbc728d8426ac Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 23 Dec 2024 12:56:19 +0000 Subject: [PATCH 14/17] !fixup address latest comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 20 +++++++++---------- .../AArch64/low_trip_count_predicates.ll | 5 ----- .../AArch64/sve-tail-folding-forced.ll | 3 +-- .../LoopVectorize/X86/gather_scatter.ll | 2 -- .../epilog-vectorization-any-of-reductions.ll | 2 -- .../optimal-epilog-vectorization.ll | 1 - 6 files changed, 10 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 84c82d7443565..a92bde13ff66c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2670,15 +2670,16 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues( // Otherwise it is computed. Value *EndValueFromAdditionalBypass = MainVectorTripCount; if (OrigPhi != OldInduction) { + auto *BinOp = II.getInductionBinOp(); // Fast-math-flags propagate from the original induction instruction. - if (isa_and_nonnull(II.getInductionBinOp())) + if (isa_and_nonnull(BinOp)) BypassBuilder.setFastMathFlags( II.getInductionBinOp()->getFastMathFlags()); // Compute the end value for the additional bypass. - EndValueFromAdditionalBypass = emitTransformedIndex( - BypassBuilder, MainVectorTripCount, II.getStartValue(), Step, - II.getKind(), II.getInductionBinOp()); + EndValueFromAdditionalBypass = + emitTransformedIndex(BypassBuilder, MainVectorTripCount, + II.getStartValue(), Step, II.getKind(), BinOp); EndValueFromAdditionalBypass->setName("ind.end"); } @@ -8857,15 +8858,11 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, /// Create and return a ResumePhi for \p PhiR, if it is wide induction recipe. /// If the induction recipe is not canonical, creates a VPDerivedIVRecipe to /// compute the end value of the induction. -static VPValue *addResumePhiRecipeForInduction(VPHeaderPHIRecipe *PhiR, +static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) { - auto *WideIV = dyn_cast(PhiR); - if (!WideIV) - return nullptr; - auto *WideIntOrFp = dyn_cast(WideIV); // Truncated wide inductions resume from the last lane of their vector value // in the last vector iteration which is handled elsewhere. @@ -8916,9 +8913,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { break; auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); - if (isa(VectorPhiR)) { + if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPValue *ResumePhi = addResumePhiRecipeForInduction( - VectorPhiR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, + WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, &Plan.getVectorTripCount())) { ScalarPhiIRI->addOperand(ResumePhi); continue; @@ -10084,6 +10081,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { 0, MainPlan.getOrAddLiveIn(Constant::getNullValue(IRI->getType()))); ResumePhi->eraseFromParent(); } + VPlanTransforms::removeDeadRecipes(MainPlan); using namespace VPlanPatternMatch; VPBasicBlock *MainScalarPH = MainPlan.getScalarPreheader(); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index 0b2667a90c2e2..6d57f212fd88c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -35,9 +35,6 @@ target triple = "aarch64-unknown-linux-gnu" ; DEBUG: LV: can fold tail by masking. ; DEBUG: Executing best plan with VF=vscale x 16, UF=1 -; TODO: Clean up unused add instruction computing the end value of the -; induction, which gets created during execution of the main plan once -; VPlan's scope includes both vector loops. define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef %val) { ; CHECK-VS1-LABEL: define void @low_vf_ic_is_better( ; CHECK-VS1-SAME: ptr nocapture noundef [[P:%.*]], i32 [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { @@ -77,7 +74,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS1-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS1-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16 -; CHECK-VS1-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-VS1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -186,7 +182,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VS2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-VS2-NEXT: [[TMP40:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-VS2-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index a67d4f8a19ff7..1f7d0b745f929 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -11,8 +11,7 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-LABEL: Checking a loop in 'simple_memset' ; VPLANS: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { ; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; VPLANS-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count -; VPLANS-NEXT: vp<[[TC:%[0-9]+]]> = original trip-count +; VPLANS: vp<[[TC:%[0-9]+]]> = original trip-count ; VPLANS-EMPTY: ; VPLANS-NEXT: ir-bb: ; VPLANS-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 umax %n) diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 4e8149568ae9c..8c338d6a746c4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -639,8 +639,6 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512: vector.ph: ; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16 ; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] -; AVX512-NEXT: [[TMP36:%.*]] = mul i64 [[N_VEC]], 4 -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP36]] ; AVX512-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 64 ; AVX512-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP13]] ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 7d9eae864ff5b..94593a7d9a81d 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -331,8 +331,6 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[N_VEC]], 16 -; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP50]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index f223414dfe48e..1bfb34165e52e 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -175,7 +175,6 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] From 6c8def548347442e1d31daedb2a0e79c929ab9b9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 23 Dec 2024 19:31:04 +0000 Subject: [PATCH 15/17] !fixup address comments, thanks! --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a92bde13ff66c..65d8f6fafa883 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2674,7 +2674,7 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues( // Fast-math-flags propagate from the original induction instruction. if (isa_and_nonnull(BinOp)) BypassBuilder.setFastMathFlags( - II.getInductionBinOp()->getFastMathFlags()); + BinOp->getFastMathFlags()); // Compute the end value for the additional bypass. EndValueFromAdditionalBypass = @@ -8855,9 +8855,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } -/// Create and return a ResumePhi for \p PhiR, if it is wide induction recipe. -/// If the induction recipe is not canonical, creates a VPDerivedIVRecipe to -/// compute the end value of the induction. +/// Create and return a ResumePhi for \p WideIF, unless it is truncated. If the +/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute +/// the end value of the induction. static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, VPBuilder &ScalarPHBuilder, From 1e44bdd754e788eb428b307c72760e79ea313ce8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 23 Dec 2024 19:43:33 +0000 Subject: [PATCH 16/17] !fixup fix formatting --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 65d8f6fafa883..01d30807fa858 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2673,8 +2673,7 @@ void InnerLoopVectorizer::createInductionAdditionalBypassValues( auto *BinOp = II.getInductionBinOp(); // Fast-math-flags propagate from the original induction instruction. if (isa_and_nonnull(BinOp)) - BypassBuilder.setFastMathFlags( - BinOp->getFastMathFlags()); + BypassBuilder.setFastMathFlags(BinOp->getFastMathFlags()); // Compute the end value for the additional bypass. EndValueFromAdditionalBypass = From 6e860aa26058dc53f9d71b3fad578235c1539fcb Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 29 Dec 2024 18:05:00 +0000 Subject: [PATCH 17/17] !fixup address remaining comments, thanks! --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b5b07e1cacc44..1975df3cacbca 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8854,7 +8854,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); } -/// Create and return a ResumePhi for \p WideIF, unless it is truncated. If the +/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the /// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute /// the end value of the induction. static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, @@ -10064,7 +10064,8 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { EpiWidenedPhis.insert( cast(R.getVPSingleValue()->getUnderlyingValue())); } - for (VPRecipeBase &R : *cast(MainPlan.getScalarHeader())) { + for (VPRecipeBase &R : make_early_inc_range( + *cast(MainPlan.getScalarHeader()))) { auto *VPIRInst = cast(&R); auto *IRI = dyn_cast(&VPIRInst->getInstruction()); if (!IRI) @@ -10072,12 +10073,11 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { if (EpiWidenedPhis.contains(IRI)) continue; // There is no corresponding wide induction in the epilogue plan that would - // need a resume value. Set the operand in VPIRInst to zero, so ResumePhi - // can be removed. The resume values for the scalar loop will be created - // during execution of EpiPlan. + // need a resume value. Remove the VPIRInst wrapping the scalar header phi + // together with the corresponding ResumePhi. The resume values for the + // scalar loop will be created during execution of EpiPlan. VPRecipeBase *ResumePhi = VPIRInst->getOperand(0)->getDefiningRecipe(); - VPIRInst->setOperand( - 0, MainPlan.getOrAddLiveIn(Constant::getNullValue(IRI->getType()))); + VPIRInst->eraseFromParent(); ResumePhi->eraseFromParent(); } VPlanTransforms::removeDeadRecipes(MainPlan);