diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d7a0a29d6ed55..7475a70abb60c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8620,9 +8620,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, // Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar // IV by VF * UF. - auto *CanonicalIVIncrement = - new VPInstruction(VPInstruction::CanonicalIVIncrement, {CanonicalIVPHI}, - {HasNUW, false}, DL, "index.next"); + auto *CanonicalIVIncrement = new VPInstruction( + VPInstruction::AddPart0, {CanonicalIVPHI, &Plan.getRuntimeVFxUF()}, + {HasNUW, false}, DL, "index.next"); CanonicalIVPHI->addOperand(CanonicalIVIncrement); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0074532f9c68d..0274484355c99 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -741,6 +741,12 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) State.set(&VectorTripCount, VectorTripCountV, Part); + IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); + // FIXME: Model runtime VF * UF computation completely in VPlan. + State.set(&RuntimeVFxUF, + createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF), + 0); + // When vectorizing the epilogue loop, the canonical induction start value // needs to be changed from zero to the value after the main vector loop. // FIXME: Improve modeling for canonical IV start values in the epilogue loop. @@ -752,7 +758,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, return isa(U) || isa(U) || cast(U)->getOpcode() == - VPInstruction::CanonicalIVIncrement; + VPInstruction::AddPart0; }) && "the canonical IV should only be used by its increment or " "ScalarIVSteps when resetting the start value"); @@ -845,6 +851,13 @@ void VPlan::execute(VPTransformState *State) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPlan::printLiveIns(raw_ostream &O) const { VPSlotTracker SlotTracker(this); + + if (RuntimeVFxUF.getNumUsers() > 0) { + O << "\nLive-in "; + RuntimeVFxUF.printAsOperand(O, SlotTracker); + O << " = runtime VF * UF"; + } + if (VectorTripCount.getNumUsers() > 0) { O << "\nLive-in "; VectorTripCount.printAsOperand(O, SlotTracker); @@ -1237,6 +1250,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) { } void VPSlotTracker::assignSlots(const VPlan &Plan) { + if (Plan.RuntimeVFxUF.getNumUsers() > 0) + assignSlot(&Plan.RuntimeVFxUF); assignSlot(&Plan.VectorTripCount); if (Plan.BackedgeTakenCount) assignSlot(Plan.BackedgeTakenCount); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 639c2ac685a66..24a9872053a31 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1029,9 +1029,8 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue { SLPStore, ActiveLaneMask, CalculateTripCountMinusVF, - CanonicalIVIncrement, - // The next op is similar to the above, but instead increment the - // canonical IV separately for each unrolled part. + AddPart0, + // Increment the canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, BranchOnCount, BranchOnCond @@ -1139,7 +1138,7 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue { return false; case VPInstruction::ActiveLaneMask: case VPInstruction::CalculateTripCountMinusVF: - case VPInstruction::CanonicalIVIncrement: + case VPInstruction::AddPart0: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: return true; @@ -2512,6 +2511,9 @@ class VPlan { /// Represents the vector trip count. VPValue VectorTripCount; + /// Represents the loop-invariant runtime VF * UF of the vector loop region. + VPValue RuntimeVFxUF; + /// Holds a mapping between Values and their corresponding VPValue inside /// VPlan. Value2VPValueTy Value2VPValue; @@ -2591,6 +2593,9 @@ class VPlan { /// The vector trip count. VPValue &getVectorTripCount() { return VectorTripCount; } + /// Returns runtime VF * UF for the vector loop region. + VPValue &getRuntimeVFxUF() { return RuntimeVFxUF; } + /// Mark the plan to indicate that using Value2VPValue is not safe any /// longer, because it may be stale. void disableValue2VPValue() { Value2VPValueEnabled = false; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f73487ffb6a7e..bb029caa7b353 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -120,7 +120,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case Instruction::ICmp: case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: - case VPInstruction::CanonicalIVIncrement: + case VPInstruction::AddPart0: case VPInstruction::CanonicalIVIncrementForPart: return false; default: @@ -335,13 +335,10 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); return Builder.CreateSelect(Cmp, Sub, Zero); } - case VPInstruction::CanonicalIVIncrement: { + case VPInstruction::AddPart0: { if (Part == 0) { auto *Phi = State.get(getOperand(0), 0); - // The loop step is equal to the vectorization factor (num of SIMD - // elements) times the unroll factor (num of SIMD instructions). - Value *Step = - createStepForVF(Builder, Phi->getType(), State.VF, State.UF); + Value *Step = State.get(getOperand(1), 0); return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(), hasNoSignedWrap()); } @@ -468,8 +465,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::FirstOrderRecurrenceSplice: O << "first-order splice"; break; - case VPInstruction::CanonicalIVIncrement: - O << "VF * UF +"; + case VPInstruction::AddPart0: + O << "add (part 0)"; break; case VPInstruction::BranchOnCond: O << "branch-on-cond"; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 6b5d69d100dde..8c50d86489c9d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -18,16 +18,16 @@ define void @f1(ptr %A) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 98d55ae15c077..763b3e0bc8293 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -54,6 +54,8 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; SVE-NEXT: br label [[VECTOR_BODY:%.*]] ; SVE: vector.body: ; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -66,8 +68,6 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], [[TMP7]] ; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP8]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; SVE-NEXT: [[TMP9]] = fadd [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 28962dfba8924..144b29d84198a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -19,6 +19,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -27,8 +29,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP5:%.*]] = call @foo_vector( [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -60,6 +60,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] ; TFCOMMON: vector.body: @@ -70,8 +72,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -111,6 +111,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -122,8 +124,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], zeroinitializer, [[TMP6]] ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 -; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -161,6 +161,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TFCOMMON-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] ; TFCOMMON: vector.body: @@ -177,8 +179,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFCOMMON-NEXT: [[TMP12:%.*]] = or [[TMP7]], [[TMP10]] ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP11]], i32 8, [[TMP12]]) -; TFCOMMON-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFCOMMON-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -229,6 +229,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -241,8 +243,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP7]], [[TMP8]] ; TFNONE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[PREDPHI]], ptr [[TMP9]], align 8 -; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -283,6 +283,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TFCOMMON-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; TFCOMMON-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]] ; TFCOMMON: vector.body: @@ -300,8 +302,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; TFCOMMON-NEXT: [[TMP13:%.*]] = or [[TMP8]], [[TMP10]] ; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP12]], i32 8, [[TMP13]]) -; TFCOMMON-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TFCOMMON-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFCOMMON-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -355,6 +355,8 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -363,8 +365,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP5:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -413,6 +413,8 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; TFFALLBACK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -421,8 +423,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP5:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) ; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFFALLBACK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -475,6 +475,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -483,8 +485,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP5:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) ; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP5]], ptr [[TMP6]], align 8 -; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -516,6 +516,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TFALWAYS-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]] ; TFALWAYS: vector.body: @@ -526,8 +528,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) ; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFALWAYS-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFALWAYS-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -546,6 +546,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TFFALLBACK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: @@ -556,8 +558,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP6:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[ACTIVE_LANE_MASK]]) ; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP6]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFFALLBACK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFFALLBACK-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -599,6 +599,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M:%.*]], i64 0 ; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -613,8 +615,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFNONE-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 ; TFNONE-NEXT: [[TMP9]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP5]]) -; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -651,6 +651,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TFALWAYS-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; TFALWAYS-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M:%.*]], i64 0 ; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -668,8 +670,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFALWAYS-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) ; TFALWAYS-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) -; TFALWAYS-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; TFALWAYS-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] ; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFALWAYS-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -688,6 +688,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TFFALLBACK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; TFFALLBACK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double [[M:%.*]], i64 0 ; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -705,8 +707,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub ; TFFALLBACK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFFALLBACK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) ; TFFALLBACK-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], [[TMP10]]) -; TFFALLBACK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; TFFALLBACK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] ; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFFALLBACK-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll index ca3c935669e97..59a1e108b92f0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll @@ -18,6 +18,8 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -49,8 +51,6 @@ define void @foo() { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[TMP16]], shufflevector ( insertelement ( poison, i64 1024, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 3f6d1b70f891c..9bb9417398526 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -17,28 +17,28 @@ define void @test_invar_gep(ptr %dst) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 100, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 100, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = add zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 0 -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP7]], i32 [[TMP15]] -; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP7:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = add [[DOTSPLAT]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP9]], i32 [[TMP17]] +; CHECK-NEXT: store i64 [[TMP18]], ptr [[TMP14]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index db97dc6796738..a3c108bca140b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -24,33 +24,33 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]] +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[IDX]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = mul i32 1, [[TMP10]] -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv2i32() +; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul [[TMP9]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add [[DOTSPLAT]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]] +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IDX]], [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[A:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[A:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP17]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP19]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -58,7 +58,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 2 ; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP13]], i32 [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP15]], i32 [[TMP23]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[L_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index 52feff81914a3..43220d582c4c8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -14,24 +14,24 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = select fast [[TMP7]], [[WIDE_MASKED_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP10]]) -; CHECK-NEXT: [[TMP12]] = fadd fast float [[TMP11]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[TMP9]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP9]], [[WIDE_MASKED_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP12]]) +; CHECK-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -39,7 +39,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], [[FOR_INC:%.*]] ] @@ -57,9 +57,9 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture ; CHECK-NEXT: [[RES]] = phi float [ [[FADD]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RES_LCSSA]] ; entry: @@ -101,25 +101,25 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = select fast [[TMP7]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0x7FF0000000000000, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP10]]) -; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP11]], [[VEC_PHI]] -; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP11]], float [[VEC_PHI]] -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = fcmp une [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP10]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP11]], i32 4, [[TMP9]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP9]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0x7FF0000000000000, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP12]]) +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index f045f486eac27..fbb2b177a16f9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -42,6 +42,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -51,8 +53,6 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; CHECK-UNORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-UNORDERED-NEXT: [[TMP7]] = fadd [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -89,6 +89,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -98,8 +100,6 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; CHECK-ORDERED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-ORDERED-NEXT: [[TMP7]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-ORDERED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -136,6 +136,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -153,8 +155,6 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8 ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -230,6 +230,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -275,8 +277,6 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP35]] = fadd [[WIDE_LOAD4]], [[VEC_PHI1]] ; CHECK-UNORDERED-NEXT: [[TMP36]] = fadd [[WIDE_LOAD5]], [[VEC_PHI2]] ; CHECK-UNORDERED-NEXT: [[TMP37]] = fadd [[WIDE_LOAD6]], [[VEC_PHI3]] -; CHECK-UNORDERED-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 32 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP39]] ; CHECK-UNORDERED-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -316,6 +316,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -358,8 +360,6 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP35:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP34]], [[WIDE_LOAD1]]) ; CHECK-ORDERED-NEXT: [[TMP36:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP35]], [[WIDE_LOAD2]]) ; CHECK-ORDERED-NEXT: [[TMP37]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP36]], [[WIDE_LOAD3]]) -; CHECK-ORDERED-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 32 ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP39]] ; CHECK-ORDERED-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -396,6 +396,8 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP70:%.*]] = mul i64 [[TMP69]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -479,8 +481,6 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP66:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP64]], [[TMP65]]) ; CHECK-ORDERED-TF-NEXT: [[TMP67:%.*]] = select [[ACTIVE_LANE_MASK8]], [[WIDE_MASKED_LOAD11]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP68]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP66]], [[TMP67]]) -; CHECK-ORDERED-TF-NEXT: [[TMP69:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP70:%.*]] = mul i64 [[TMP69]], 32 ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP70]] ; CHECK-ORDERED-TF-NEXT: [[TMP71:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP72:%.*]] = mul i64 [[TMP71]], 8 @@ -589,6 +589,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-UNORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 ; CHECK-UNORDERED-NEXT: [[TMP7:%.*]] = insertelement shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float [[A2]], i32 0 ; CHECK-UNORDERED-NEXT: [[TMP8:%.*]] = insertelement shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer), float [[A1]], i32 0 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -606,8 +608,6 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 ; CHECK-UNORDERED-NEXT: [[TMP14]] = fadd [[TMP12]], [[VEC_PHI1]] ; CHECK-UNORDERED-NEXT: [[TMP15]] = fadd [[TMP13]], [[VEC_PHI]] -; CHECK-UNORDERED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] ; CHECK-UNORDERED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -661,6 +661,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-ORDERED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -676,8 +678,6 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-NEXT: [[TMP11:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 ; CHECK-ORDERED-NEXT: [[TMP12]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP11]]) ; CHECK-ORDERED-NEXT: [[TMP13]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP10]]) -; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; CHECK-ORDERED-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -730,6 +730,8 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-ORDERED-TF-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] @@ -755,8 +757,6 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP18]]) ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI1]], [[TMP20]]) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP12]]) ; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -867,6 +867,8 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -880,8 +882,6 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-UNORDERED-NEXT: [[TMP11]] = fadd [[VEC_PHI]], [[TMP10]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -929,6 +929,8 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP4]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -942,8 +944,6 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-ORDERED-NEXT: [[TMP11]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP10]]) -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -991,6 +991,8 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP5]] ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[TMP7]] @@ -1012,8 +1014,6 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP16]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP17]]) -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]]) ; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -1110,6 +1110,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1125,8 +1127,6 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-UNORDERED-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-UNORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] ; CHECK-UNORDERED-NEXT: [[TMP11]] = fadd [[VEC_PHI]], [[PREDPHI]] -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-UNORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1171,6 +1171,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1186,8 +1188,6 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD]] ; CHECK-ORDERED-NEXT: [[TMP11]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[PREDPHI]]) -; CHECK-ORDERED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-ORDERED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1232,6 +1232,8 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -1258,8 +1260,6 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = or [[TMP15]], [[TMP18]] ; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = select [[TMP19]], [[PREDPHI]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP21]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP20]]) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP23]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -1358,6 +1358,8 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1371,8 +1373,6 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-UNORDERED-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 ; CHECK-UNORDERED-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-UNORDERED-NEXT: [[TMP10]] = fadd [[TMP7]], [[WIDE_LOAD1]] -; CHECK-UNORDERED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-UNORDERED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1498,6 +1498,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1561,8 +1563,6 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP49]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) ; CHECK-UNORDERED-NEXT: [[TMP50]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) ; CHECK-UNORDERED-NEXT: [[TMP51]] = call @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 32 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP53]] ; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -1604,6 +1604,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1668,8 +1670,6 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP53:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP52]], [[TMP49]]) ; CHECK-ORDERED-NEXT: [[TMP54:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP53]], [[TMP50]]) ; CHECK-ORDERED-NEXT: [[TMP55]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP54]], [[TMP51]]) -; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 32 ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP57]] ; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1708,6 +1708,8 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP88:%.*]] = mul i64 [[TMP87]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -1813,8 +1815,6 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP84:%.*]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP82]], [[TMP83]]) ; CHECK-ORDERED-TF-NEXT: [[TMP85:%.*]] = select [[ACTIVE_LANE_MASK8]], [[TMP78]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP86]] = call float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP84]], [[TMP85]]) -; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP88:%.*]] = mul i64 [[TMP87]], 32 ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP88]] ; CHECK-ORDERED-TF-NEXT: [[TMP89:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP90:%.*]] = mul i64 [[TMP89]], 8 @@ -1912,6 +1912,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-UNORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-UNORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-UNORDERED-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-UNORDERED-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 32 ; CHECK-UNORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UNORDERED: vector.body: ; CHECK-UNORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1975,8 +1977,6 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-UNORDERED-NEXT: [[TMP49]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD4]], [[WIDE_LOAD8]], [[VEC_PHI1]]) ; CHECK-UNORDERED-NEXT: [[TMP50]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD5]], [[WIDE_LOAD9]], [[VEC_PHI2]]) ; CHECK-UNORDERED-NEXT: [[TMP51]] = call nnan @llvm.fmuladd.nxv8f32( [[WIDE_LOAD6]], [[WIDE_LOAD10]], [[VEC_PHI3]]) -; CHECK-UNORDERED-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UNORDERED-NEXT: [[TMP53:%.*]] = mul i64 [[TMP52]], 32 ; CHECK-UNORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP53]] ; CHECK-UNORDERED-NEXT: [[TMP54:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-UNORDERED-NEXT: br i1 [[TMP54]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -2018,6 +2018,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-ORDERED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-ORDERED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 32 ; CHECK-ORDERED-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-ORDERED: vector.body: ; CHECK-ORDERED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -2082,8 +2084,6 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-NEXT: [[TMP53:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP52]], [[TMP49]]) ; CHECK-ORDERED-NEXT: [[TMP54:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP53]], [[TMP50]]) ; CHECK-ORDERED-NEXT: [[TMP55]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP54]], [[TMP51]]) -; CHECK-ORDERED-NEXT: [[TMP56:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-NEXT: [[TMP57:%.*]] = mul i64 [[TMP56]], 32 ; CHECK-ORDERED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP57]] ; CHECK-ORDERED-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-ORDERED-NEXT: br i1 [[TMP58]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -2122,6 +2122,8 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-ORDERED-TF-NEXT: [[TMP88:%.*]] = mul i64 [[TMP87]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 32 ; CHECK-ORDERED-TF-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -2227,8 +2229,6 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[TMP84:%.*]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP82]], [[TMP83]]) ; CHECK-ORDERED-TF-NEXT: [[TMP85:%.*]] = select nnan [[ACTIVE_LANE_MASK8]], [[TMP78]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-ORDERED-TF-NEXT: [[TMP86]] = call nnan float @llvm.vector.reduce.fadd.nxv8f32(float [[TMP84]], [[TMP85]]) -; CHECK-ORDERED-TF-NEXT: [[TMP87:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-ORDERED-TF-NEXT: [[TMP88:%.*]] = mul i64 [[TMP87]], 32 ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP88]] ; CHECK-ORDERED-TF-NEXT: [[TMP89:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-ORDERED-TF-NEXT: [[TMP90:%.*]] = mul i64 [[TMP89]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll index b3de02799f513..7848d0836d546 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -13,21 +13,21 @@ define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[BROADCAST_SPLAT]], i32 2, [[TMP4]], poison) -; CHECK-NEXT: [[TMP5:%.*]] = sext [[WIDE_MASKED_GATHER]] to -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP5]], ptr [[TMP6]], i32 4, [[TMP4]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[BROADCAST_SPLAT]], i32 2, [[TMP6]], poison) +; CHECK-NEXT: [[TMP7:%.*]] = sext [[WIDE_MASKED_GATHER]] to +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP7]], ptr [[TMP8]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -92,20 +92,20 @@ define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocap ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 4.000000e-01, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f64.nxv4p0( [[BROADCAST_SPLAT]], i32 8, [[TMP4]], poison) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP5]], i32 8, [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 4.000000e-01, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f64.nxv4p0( [[BROADCAST_SPLAT]], i32 8, [[TMP6]], poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -168,24 +168,24 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP6]], i32 4, [[TMP5]], poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP5]], poison) -; CHECK-NEXT: [[TMP7:%.*]] = add nsw [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP7]], ptr [[TMP8]], i32 4, [[TMP5]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[TMP9:%.*]] = add nsw [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP9]], ptr [[TMP10]], i32 4, [[TMP7]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll index 04af618014a79..982915fe5cc53 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -19,6 +19,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -42,8 +44,6 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) { ; CHECK-NEXT: [[TMP17]] = and i64 [[TMP16]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD3]]) ; CHECK-NEXT: [[TMP19]] = and i64 [[TMP18]], [[VEC_PHI2]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll index f84e7c0ea182b..f48933a76709d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -19,6 +19,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -40,8 +42,6 @@ define i64 @int_reduction_add(ptr %a, i64 %N) { ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 8 ; CHECK-NEXT: [[TMP16]] = add [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP17]] = add [[WIDE_LOAD3]], [[VEC_PHI2]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll index 23fd5fe5e908e..fd3d8d09fb39d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -19,29 +19,29 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP17]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP16]], [[WIDE_LOAD2]]) -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP18]], [[WIDE_LOAD2]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -52,7 +52,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] @@ -73,7 +73,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -85,7 +85,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[TMP24]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[ADD_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 6982b3f0943ce..d5f41aa440e5b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -35,26 +35,26 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 32 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]] +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP19]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -72,16 +72,16 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 8 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 1024, [[TMP24]] ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX5]], 0 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP27]], align 1 -; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 8 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP29]] +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP29]], align 1 +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP26]] ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -113,26 +113,26 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16 -; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 -; CHECK-VF8-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 32 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-VF8-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-VF8-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] +; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 16 +; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF8: middle.block: @@ -213,26 +213,26 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -282,26 +282,26 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 -; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] -; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2 -; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]] -; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP15]], align 1 -; CHECK-VF8-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-VF8-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-VF8-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-VF8-NEXT: store shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), ptr [[TMP17]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-VF8: middle.block: @@ -372,26 +372,26 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 32 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP13]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 32 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 16 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP15]] +; CHECK-NEXT: store zeroinitializer, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -412,16 +412,16 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 10000, [[TMP21]] ; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 10000, [[N_MOD_VF3]] ; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC4]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 8 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX10]], 0 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[NEXT_GEP11]], i32 0 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP23]], align 1 -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 8 -; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP25]] +; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX10]], 0 +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[NEXT_GEP11]], i32 0 +; CHECK-NEXT: store zeroinitializer, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP23]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: @@ -455,26 +455,26 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] +; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 ; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 -; CHECK-VF8-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP9]], align 1 -; CHECK-VF8-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 16 -; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP11]] -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP12]], align 1 -; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 32 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-VF8-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] +; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-VF8-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-VF8-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] +; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 +; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP11]], align 1 +; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-VF8-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 +; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP13]] +; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP14]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-VF8: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 995950e46f533..061b59f5442cd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -31,29 +31,29 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[TMP9]], i64 [[TMP12]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP13]], align 2 -; CHECK-NEXT: [[TMP14:%.*]] = fneg [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP15:%.*]] = fneg [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i32 0 -; CHECK-NEXT: store [[TMP14]], ptr [[TMP17]], align 2 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds half, ptr [[TMP16]], i64 [[TMP19]] -; CHECK-NEXT: store [[TMP15]], ptr [[TMP20]], align 2 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 2 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i64 [[TMP14]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP15]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = fneg [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP17:%.*]] = fneg [[WIDE_LOAD3]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store [[TMP16]], ptr [[TMP19]], align 2 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: store [[TMP17]], ptr [[TMP22]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index 3a867cf401b33..a9657971d48ae 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -13,18 +13,18 @@ define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[WIDE_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP4]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[WIDE_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -80,19 +80,19 @@ define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocaptu ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = sext [[WIDE_LOAD1]] to -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP5]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD1]] to +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP7]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: @@ -147,18 +147,18 @@ define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocaptu ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: @@ -217,19 +217,19 @@ define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP4]], poison) -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP3]], i32 4, [[TMP4]]) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP6]], poison) +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP5]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: @@ -292,31 +292,31 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = shl [[STEP_ADD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], [[TMP7]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP12]] -; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP8:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = shl [[STEP_ADD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], [[TMP9]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP14]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP15]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 01657493e9d4a..0fe8fa3f4154a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -11,7 +11,10 @@ target triple = "aarch64-unknown-linux-gnu" define void @induction_i7(ptr %dst) #0 { ; CHECK-LABEL: @induction_i7( ; CHECK: vector.ph: -; CHECK: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() +; CHECK: %ind.end = trunc i64 %n.vec to i7 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() ; CHECK: [[TMP5:%.*]] = trunc [[TMP4]] to ; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i7 1, i64 0), poison, zeroinitializer) @@ -25,8 +28,6 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[EXT:%.+]] = zext [[TMP11]] to ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store [[EXT]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], ; @@ -57,6 +58,9 @@ for.end: ; preds = %for.body define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-LABEL: @induction_i3_zext( ; CHECK: vector.ph: +; CHECK: %ind.end = trunc i64 %n.vec to i3 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i8() ; CHECK: [[TMP5:%.*]] = trunc [[TMP4]] to ; CHECK-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer @@ -70,8 +74,6 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; CHECK-NEXT: store [[TMP10]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll index 739010b5b631f..965c71c008aa1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -23,26 +23,26 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 2 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[VEC_IND]] to -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP10]], i32 4, [[TMP7]]) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = trunc [[VEC_IND]] to +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, [[TMP9]], poison) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP11]], i32 4, [[TMP9]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -56,14 +56,14 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]] -; CHECK-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 27b0c95873514..2b71372baccb7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -26,6 +26,8 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 @@ -46,8 +48,6 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -1 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP4]], [[TMP5]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -106,6 +106,8 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl [[TMP0]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -134,8 +136,6 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 -1 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP8]], [[TMP10]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 @@ -197,6 +197,8 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl [[TMP0]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() @@ -226,8 +228,6 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK-NEXT: [[TMP12:%.*]] = trunc [[TMP11]] to ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1024 x i16], ptr @CD_i16, i64 0, [[TMP7]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0( [[TMP12]], [[TMP13]], i32 2, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 @@ -277,6 +277,8 @@ define i32 @test_struct_load6(%struct.ST6* %S) #1 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 @@ -305,8 +307,6 @@ define i32 @test_struct_load6(%struct.ST6* %S) #1 { ; CHECK-NEXT: [[TMP12:%.*]] = add [[TMP11]], [[WIDE_MASKED_GATHER4]] ; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP12]], [[WIDE_MASKED_GATHER5]] ; CHECK-NEXT: [[TMP14]] = sub [[TMP10]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -381,6 +381,8 @@ define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i32() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub shufflevector ( insertelement ( poison, i32 1023, i64 0), poison, zeroinitializer), [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() @@ -416,8 +418,6 @@ define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly ; CHECK-NEXT: [[REVERSE3:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[TMP11]]) ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[REVERSE2]], [[REVERSE3]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -475,6 +475,8 @@ define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalia ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 512, [[TMP1]] ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw nsw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -487,8 +489,6 @@ define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalia ; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: store [[TMP4]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -561,6 +561,8 @@ define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noali ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -573,8 +575,6 @@ define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noali ; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[INDEX]], 9223372036854775804 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] ; CHECK-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -640,6 +640,8 @@ define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noal ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer), [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() @@ -659,8 +661,6 @@ define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noal ; CHECK-NEXT: [[TMP5:%.*]] = sub nsw [[WIDE_MASKED_GATHER]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP2]], [[TMP3]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i64.nxv4p0( [[TMP5]], [[TMP4]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -709,6 +709,8 @@ define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -728,8 +730,6 @@ define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -1 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[TMP4]], [[TMP7]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -802,6 +802,8 @@ define void @int_float_struct(%struct.IntFloat* nocapture readonly %p) #0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -815,8 +817,6 @@ define void @int_float_struct(%struct.IntFloat* nocapture readonly %p) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to ; CHECK-NEXT: [[TMP4]] = add [[TMP1]], [[VEC_PHI1]] ; CHECK-NEXT: [[TMP5]] = fadd fast [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -892,6 +892,8 @@ define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 @@ -911,8 +913,6 @@ define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -979,6 +979,8 @@ define i32 @PR27626_1(%pair.i32 *%p, i64 %n) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 @@ -1000,8 +1002,6 @@ define i32 @PR27626_1(%pair.i32 *%p, i64 %n) #1 { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC1]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP15]] = add [[TMP14]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1074,6 +1074,8 @@ define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 @@ -1093,8 +1095,6 @@ define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP13]], [[TMP12]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1164,6 +1164,8 @@ define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[TMP17]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 @@ -1186,8 +1188,6 @@ define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.experimental.vector.deinterleave2.nxv8i32( [[WIDE_VEC1]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP16]] = add [[TMP15]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[TMP17]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1267,6 +1267,8 @@ define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP5]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() @@ -1291,8 +1293,6 @@ define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 -1 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv8i32( [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1363,6 +1363,8 @@ define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNEG]] ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP6]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP8]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) @@ -1388,8 +1390,6 @@ define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], [[TMP15]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1473,6 +1473,8 @@ define void @PR34743(i16* %a, i32* %b, i64 %n) #1 { ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP8]], -4 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNEG]] ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP27]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i32 [[TMP9]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1 @@ -1502,8 +1504,6 @@ define void @PR34743(i16* %a, i32* %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP25:%.*]] = mul nsw [[TMP24]], [[TMP23]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP25]], ptr [[TMP26]], align 4, !alias.scope !37, !noalias !34 -; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP28:%.*]] = shl nuw nsw i64 [[TMP27]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index ae3abba27be5f..96bb3d5968380 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -35,6 +35,8 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = shl i32 [[TMP19]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 4 @@ -64,8 +66,6 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv32i8( [[TMP14]], [[TMP15]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.experimental.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP18]], i32 1, [[INTERLEAVED_MASK1]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = shl i32 [[TMP19]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP20]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -111,6 +111,8 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; PREDICATED_TAIL_FOLDING: vector.ph: ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = shl i32 [[TMP19]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP1]]) @@ -146,8 +148,6 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.experimental.vector.interleave2.nxv32i8( [[TMP14]], [[TMP15]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.experimental.vector.interleave2.nxv32i1( [[TMP10]], [[TMP10]]) ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP18]], i32 1, [[INTERLEAVED_MASK1]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = shl i32 [[TMP19]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP20]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP2]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] @@ -227,6 +227,8 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 4 @@ -247,8 +249,6 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = zext nneg [[TMP11]] to ; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP12]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP13]], i32 1, [[TMP10]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP15]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -286,6 +286,8 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; PREDICATED_TAIL_FOLDING: vector.ph: ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD]] to i32 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP1]]) @@ -312,8 +314,6 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP11]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP12]], i32 1, [[TMP13]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP15]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP2]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] @@ -389,6 +389,8 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP3]] ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 4 @@ -412,8 +414,6 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg [[TMP12]] to ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP14]], i32 1, [[TMP11]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 4 ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP16]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -457,6 +457,8 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: vector.ph: ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV3:%.*]] = zext i8 [[GUARD2]] to i32 ; PREDICATED_TAIL_FOLDING-NEXT: [[CONV:%.*]] = zext i8 [[GUARD1]] to i32 +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = shl i32 [[TMP16]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP1]]) @@ -487,8 +489,6 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( shufflevector ( insertelement ( poison, i8 2, i64 0), poison, zeroinitializer), [[TMP14]], i32 1, [[TMP15]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = shl i32 [[TMP16]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP17]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP2]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index 6a562e8b92bb7..08da1dedae23e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -15,21 +15,21 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[SRC:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP9]] -; CHECK-NEXT: store i16 [[TMP10]], ptr [[DST:%.*]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[SRC:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP11]] +; CHECK-NEXT: store i16 [[TMP12]], ptr [[DST:%.*]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -45,7 +45,7 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-NEXT: store i16 [[LD]], ptr [[DST]], align 2 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC24]], label [[FOR_BODY14]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC24]], label [[FOR_BODY14]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.inc24: ; CHECK-NEXT: ret void ; @@ -78,20 +78,20 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[TMP7]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[TMP9]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index b378603bbc52f..d12a1036ed536 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -23,6 +23,8 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -60,8 +62,6 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP31:%.*]] = mul i64 [[TMP30]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 [[TMP31]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 8 -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = mul i64 [[TMP33]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP13]] ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll index 997aca89b825d..602ccb678c968 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll @@ -4,14 +4,18 @@ target triple = "aarch64-unknown-linux-gnu" define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { ; CHECK-LABEL: @trip7_i64( +; CHECK: = call i64 @llvm.vscale.i64() +; CHECK-NEXT: = mul i64 +; CHECK: = call i64 @llvm.vscale.i64() +; CHECK-NEXT: = mul i64 +; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 2 ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK: [[ACTIVE_LANE_MASK:%.*]] = phi [ {{%.*}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ] ; CHECK: {{%.*}} = call @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]], poison) ; CHECK: {{%.*}} = call @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]], poison) ; CHECK: call void @llvm.masked.store.nxv2i64.p0( {{%.*}}, ptr {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 7) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index 79bda0a4a5d80..006171f3b0682 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -33,34 +33,34 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP21]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP27]] -; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP30]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP27]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] +; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -131,34 +131,34 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP21]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP26]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP27]] -; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP28]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP30]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP24]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP18]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP27]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]] +; CHECK-NEXT: store [[WIDE_LOAD3]], ptr [[TMP30]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index 166d771482441..1d5c60bd7374e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -47,6 +47,8 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP49]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -90,8 +92,6 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP47:%.*]] = mul i64 [[TMP46]], 2 ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP39]], i64 [[TMP47]] ; CHECK-NEXT: store [[TMP36]], ptr [[TMP48]], align 8 -; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP49]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP50]] ; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 2ca5fbebed877..aa3600602e547 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -10,6 +10,7 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-LABEL: Checking a loop in 'simple_memset' ; VPLANS: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { +; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; VPLANS-NEXT: vp<[[TC:%[0-9]+]]> = original trip-count ; VPLANS-EMPTY: ; VPLANS-NEXT: ph: @@ -29,7 +30,7 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-NEXT: vp<[[STEP:%[0-9]+]]> = SCALAR-STEPS vp<[[INDV]]>, ir<1> ; VPLANS-NEXT: CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEP]]> ; VPLANS-NEXT: WIDEN store ir<%gep>, ir<%val>, vp<[[LANEMASK_PHI]]> -; VPLANS-NEXT: EMIT vp<[[INDV_UPDATE:%[0-9]+]]> = VF * UF + vp<[[INDV]]> +; VPLANS-NEXT: EMIT vp<[[INDV_UPDATE:%[0-9]+]]> = add (part 0) vp<[[INDV]]>, vp<[[VFxUF]]> ; VPLANS-NEXT: EMIT vp<[[INC:%[0-9]+]]> = VF * Part + vp<[[INDV]]> ; VPLANS-NEXT: EMIT vp<[[LANEMASK_LOOP]]> = active lane mask vp<[[INC]]>, vp<[[NEWTC]]> ; VPLANS-NEXT: EMIT vp<[[NOT:%[0-9]+]]> = not vp<[[LANEMASK_LOOP]]> @@ -51,6 +52,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -67,8 +70,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index 275a26bae1f2e..568046ca29f58 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -16,24 +16,24 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = shl nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_MASKED_LOAD1]], [[TMP8]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP11]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = shl nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_MASKED_LOAD1]], [[TMP10]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP13]], ptr [[TMP12]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[TMP14]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index c0e736399f293..df107847e3e32 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -15,6 +15,8 @@ define void @cannot_overflow_i32_induction_var(ptr noalias %dst, ptr readonly %s ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -25,8 +27,6 @@ define void @cannot_overflow_i32_induction_var(ptr noalias %dst, ptr readonly %s ; CHECK-NEXT: [[TMP1:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP1]], ptr [[TMP2]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 @@ -76,6 +76,8 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK: for.body.preheader: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]]) @@ -89,8 +91,6 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK-NEXT: [[TMP4:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP4]], ptr [[TMP5]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index 7e0f18b9efae1..70e50992b438d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -20,6 +20,8 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -37,8 +39,6 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP14]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -77,6 +77,8 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-IN-LOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -95,8 +97,6 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], zeroinitializer ; CHECK-IN-LOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP13]]) ; CHECK-IN-LOOP-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]] -; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -152,6 +152,8 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -169,8 +171,6 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -208,6 +208,8 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-IN-LOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -225,8 +227,6 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-IN-LOOP-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) ; CHECK-IN-LOOP-NEXT: [[TMP14]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP13]]) -; CHECK-IN-LOOP-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -281,6 +281,8 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -306,8 +308,6 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[TMP17]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP20]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP23:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -353,6 +353,8 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-IN-LOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-IN-LOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-IN-LOOP-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-IN-LOOP-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -376,8 +378,6 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = select [[TMP15]], [[WIDE_MASKED_LOAD1]], zeroinitializer ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP17]]) ; CHECK-IN-LOOP-NEXT: [[TMP19]] = xor i32 [[TMP18]], [[VEC_PHI]] -; CHECK-IN-LOOP-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-IN-LOOP-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]] ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index caeb37bce2f18..4f73d7c1a7735 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -18,6 +18,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP61]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -94,8 +96,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP59:%.*]] = mul i64 [[TMP58]], 12 ; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP60]], i32 4, [[ACTIVE_LANE_MASK9]]) -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP62:%.*]] = mul i64 [[TMP61]], 16 ; CHECK-NEXT: [[INDEX_NEXT10]] = add i64 [[INDEX6]], [[TMP62]] ; CHECK-NEXT: [[TMP63:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[TMP63]], 4 @@ -160,6 +160,8 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP83]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -262,8 +264,6 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[TMP81:%.*]] = mul i64 [[TMP80]], 12 ; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP81]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP82]], i32 4, [[TMP72]]) -; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP83]], 16 ; CHECK-NEXT: [[INDEX_NEXT13]] = add i64 [[INDEX6]], [[TMP84]] ; CHECK-NEXT: [[TMP85:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP85]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 292bdd5c8be85..579e8d4ebb3c3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -18,6 +18,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -34,8 +36,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -145,6 +145,8 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -162,8 +164,6 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -222,6 +222,8 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], [[TMP9]] @@ -246,8 +248,6 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP20]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP22]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]]) ; CHECK-NEXT: [[TMP23:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -303,6 +303,8 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -321,8 +323,6 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST:%.*]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_GATHER]], [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP16]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -382,6 +382,8 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -399,8 +401,6 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP16:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -457,6 +457,8 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -483,8 +485,6 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[TMP18:%.*]] = or [[TMP15]], [[TMP16]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP19]], i32 4, [[TMP18]]) -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP21]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP22:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -557,6 +557,8 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP6]] @@ -574,8 +576,6 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -629,6 +629,8 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -648,8 +650,6 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP15:%.*]] = fdiv [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD2]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP15]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4 ; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP17]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -709,6 +709,8 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -729,8 +731,6 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[WIDE_MASKED_LOAD2]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP16:%.*]] = udiv [[WIDE_MASKED_LOAD]], [[TMP15]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP16]], ptr [[TMP14]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 ; CHECK-NEXT: [[INDEX_NEXT3]] = add i64 [[INDEX1]], [[TMP18]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -786,6 +786,8 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -795,8 +797,6 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 136561dcacd14..8d5c5be832574 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -23,6 +23,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP31:%.*]] = shl i64 [[TMP30]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -56,8 +58,6 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i64 [[TMP26]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[TMP27]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP29]], align 8 -; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP31:%.*]] = shl i64 [[TMP30]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP31]] ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -125,6 +125,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP34:%.*]] = shl i64 [[TMP33]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -158,8 +160,6 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[TMP22]], i64 [[TMP29]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[TMP31]], i64 [[TMP30]] ; CHECK-NEXT: store [[TMP21]], ptr [[TMP32]], align 8 -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = shl i64 [[TMP33]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP34]] ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll index 31cf2c4e2db4c..c3ace983fd911 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll @@ -7,6 +7,8 @@ define void @test_big_little_params(ptr readonly %a, ptr readonly %b, ptr noalia ; CHECK-LABEL: define void @test_big_little_params ; CHECK-SAME: (ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 1025) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -19,8 +21,6 @@ define void @test_big_little_params(ptr readonly %a, ptr readonly %b, ptr noalia ; CHECK-NEXT: [[TMP2:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP2]], ptr [[TMP3]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 @@ -52,6 +52,8 @@ define void @test_little_big_params(ptr readonly %a, ptr readonly %b, ptr noalia ; CHECK-LABEL: define void @test_little_big_params ; CHECK-SAME: (ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -64,8 +66,6 @@ define void @test_little_big_params(ptr readonly %a, ptr readonly %b, ptr noalia ; CHECK-NEXT: [[TMP2:%.*]] = call @bar_vector( [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP2]], ptr [[TMP3]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index 62c778aca51d0..c2c614ddfa5b8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -9,6 +9,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1 ; ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%N> = original trip-count ; CHECK-EMPTY: @@ -25,7 +26,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%lv>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.2>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -49,6 +50,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -75,8 +78,6 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP18]], align 1 ; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: store [[TMP19]], ptr [[TMP18]], align 1 -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -142,6 +143,8 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -162,8 +165,6 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: store [[TMP15]], ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP17]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll index 9cd2781ae2359..c4386c3ce5d11 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -27,6 +27,8 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]] ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP26:%.*]] = shl nuw nsw i64 [[TMP25]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -61,8 +63,6 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt ; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 2 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP23]] ; CHECK-NEXT: store [[TMP20]], ptr [[TMP24]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = shl nuw nsw i64 [[TMP25]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -143,6 +143,8 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -162,8 +164,6 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP13]] ; CHECK-NEXT: store [[TMP11]], ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -234,6 +234,8 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3 ; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -250,8 +252,6 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP10]], align 8 ; CHECK-NEXT: [[TMP11]] = add [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: store [[TMP8]], ptr [[NEXT_GEP]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -310,6 +310,8 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr % ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -322,8 +324,6 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr % ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne [[TMP3]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement [[TMP3]], i64 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0( zeroinitializer, ptr [[TMP5]], i32 2, [[TMP4]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index eaabc263913a7..ff63e35c6089d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -10,6 +10,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -25,7 +26,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: REPLICATE ir<%call> = call @foo(ir<%load>) ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -36,6 +37,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -51,7 +53,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask) ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -67,6 +69,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -82,7 +85,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask) ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXST:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXST:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -93,6 +96,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -108,7 +112,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>, ir) (using library function: foo_vector_fixed4_mask) ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -123,6 +127,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -138,7 +143,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask) ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -149,6 +154,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -164,7 +170,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask) ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index 814d3ba53050e..94f24fea3609c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -22,6 +22,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]] ; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; NONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; NONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NONE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -31,8 +33,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]] ; NONE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; NONE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 4 -; NONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; NONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP8]] ; NONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; NONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -69,6 +69,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; DATA-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; DATA-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DATA-NEXT: br label [[VECTOR_BODY:%.*]] @@ -79,8 +81,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] ; DATA-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 ; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] ; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -119,6 +119,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector [[BROADCAST_SPLATINSERT4]], poison, zeroinitializer ; DATA_NO_LANEMASK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -134,8 +136,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] ; DATA_NO_LANEMASK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 ; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT5]], ptr [[TMP14]], i32 4, [[TMP12]]) -; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP16]] ; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] ; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -171,6 +171,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]] ; DATA_AND_CONTROL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] ; DATA_AND_CONTROL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; DATA_AND_CONTROL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -182,8 +184,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP9]] ; DATA_AND_CONTROL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 ; DATA_AND_CONTROL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP13]] ; DATA_AND_CONTROL-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT2]], i64 [[UMAX]]) ; DATA_AND_CONTROL-NEXT: [[TMP14:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) @@ -217,6 +217,8 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP4]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]] @@ -233,8 +235,6 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll index f49b3714062d7..dec3c286345ad 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll @@ -32,18 +32,18 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = zext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP9]], ptr [[TMP10]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = zext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -115,18 +115,18 @@ define void @sext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 8 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = sext [[WIDE_LOAD]] to -; CHECK-NEXT: [[TMP9:%.*]] = add [[TMP8]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP9]], ptr [[TMP10]], align 2 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = sext [[WIDE_LOAD]] to +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP10]], trunc ( shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) to ) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP12]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll index 51465dbf13e2f..bda20ae18bc96 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll @@ -9,6 +9,8 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; CHECK-LABEL: define void @test_uniform ; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]]) @@ -22,8 +24,6 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6 ; CHECK-NEXT: [[TMP4:%.*]] = call @foo_uniform( [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP4]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 @@ -53,6 +53,8 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; CHECK-LABEL: define void @test_uniform_smaller_scalar ; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]]) @@ -66,8 +68,6 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3 ; CHECK-NEXT: [[TMP4:%.*]] = call @bar_uniform( [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP4]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index f82eb9c7822f2..31362a2b225eb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -7,7 +7,9 @@ target triple = "arm64-apple-ios" ; CHECK-LABEL: LV: Checking a loop in 'test' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count + ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: @@ -23,7 +25,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -34,6 +36,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: } ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: @@ -50,7 +53,7 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using vector intrinsic) ; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll index 7d095206c6062..4a2f9d07ed91c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll @@ -16,6 +16,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; WIDE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; WIDE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; WIDE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; WIDE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; WIDE-NEXT: br label [[VECTOR_BODY:%.*]] ; WIDE: vector.body: ; WIDE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -25,8 +27,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #1 { ; WIDE-NEXT: [[TMP6:%.*]] = call @foo_vector( [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; WIDE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; WIDE-NEXT: store [[TMP6]], ptr [[TMP7]], align 4 -; WIDE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; WIDE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; WIDE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; WIDE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; WIDE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index 4b93ea30cf252..db0be4c628157 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -22,20 +22,20 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -85,22 +85,22 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7]] = add [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP7]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP9]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index a6ab8d1a1f35d..1f1891ee45488 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -19,20 +19,20 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -123,20 +123,20 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -227,20 +227,20 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -331,20 +331,20 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -435,24 +435,24 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -569,24 +569,24 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -703,21 +703,21 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -830,21 +830,21 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -957,22 +957,22 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 1310ed3618b2c..57e1dc9051f4d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -21,23 +21,23 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; OUTLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; OUTLOOP-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; OUTLOOP: vector.body: ; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; OUTLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; OUTLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] -; OUTLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; OUTLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; OUTLOOP-NEXT: [[TMP8]] = add [[VEC_PHI]], [[TMP7]] -; OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 -; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP10]] +; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; OUTLOOP-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]] +; OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 +; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 +; OUTLOOP-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to +; OUTLOOP-NEXT: [[TMP10]] = add [[VEC_PHI]], [[TMP9]] +; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OUTLOOP: middle.block: -; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP8]]) +; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP10]]) ; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; OUTLOOP: scalar.ph: @@ -75,20 +75,20 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; INLOOP-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; INLOOP-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 8 ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; INLOOP: vector.body: ; INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; INLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 -; INLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] -; INLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; INLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; INLOOP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP7]]) -; INLOOP-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]] -; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 8 -; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP11]] +; INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; INLOOP-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 +; INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]] +; INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 +; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 2 +; INLOOP-NEXT: [[TMP9:%.*]] = sext [[WIDE_LOAD]] to +; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP9]]) +; INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] +; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] ; INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INLOOP: middle.block: @@ -96,7 +96,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; INLOOP: scalar.ph: ; INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; INLOOP-NEXT: br label [[FOR_BODY:%.*]] ; INLOOP: for.body: ; INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -109,7 +109,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] ; INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; INLOOP: for.cond.cleanup.loopexit: -; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; INLOOP-NEXT: br label [[FOR_COND_CLEANUP]] ; INLOOP: for.cond.cleanup: ; INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 29ef7364b8212..a9cd91b5a8cb5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -248,6 +248,8 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -276,8 +278,6 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP18]], [[TMP17]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -353,6 +353,8 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -406,8 +408,6 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP32]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP33:%.*]] = add [[WIDE_MASKED_GATHER7]], shufflevector ( insertelement ( poison, i64 8, i64 0), poison, zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[TMP33]], [[TMP32]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP35]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index 072cef66c00f4..b0aefae41a1fc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -15,6 +15,7 @@ define void @load_store(ptr %p) { ; LMUL1-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL1: vector.body: ; LMUL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -24,7 +25,6 @@ define void @load_store(ptr %p) { ; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 ; LMUL1-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; LMUL1-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; LMUL1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; LMUL1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -57,6 +57,8 @@ define void @load_store(ptr %p) { ; LMUL2-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; LMUL2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; LMUL2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; LMUL2-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL2: vector.body: ; LMUL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -66,8 +68,6 @@ define void @load_store(ptr %p) { ; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 ; LMUL2-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; LMUL2-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; LMUL2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL2-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; LMUL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -100,6 +100,8 @@ define void @load_store(ptr %p) { ; LMUL4-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; LMUL4-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL4-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL4-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; LMUL4-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; LMUL4-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL4: vector.body: ; LMUL4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -109,8 +111,6 @@ define void @load_store(ptr %p) { ; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 ; LMUL4-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; LMUL4-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; LMUL4-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL4-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; LMUL4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL4-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -143,6 +143,8 @@ define void @load_store(ptr %p) { ; LMUL8-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 ; LMUL8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; LMUL8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; LMUL8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; LMUL8-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 ; LMUL8-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL8: vector.body: ; LMUL8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -152,8 +154,6 @@ define void @load_store(ptr %p) { ; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 ; LMUL8-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; LMUL8-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; LMUL8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; LMUL8-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8 ; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; LMUL8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL8-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index ace267d72dea0..0c5394cb95a61 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -16,23 +16,23 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP5]], i64 5) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP7]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP8]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP11]], ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP7]], i64 5) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP9]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP13]], ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 34b06972dab06..1307d57f32bc1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -20,14 +20,16 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() -; VLENUNK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; VLENUNK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; VLENUNK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -35,20 +37,18 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] -; VLENUNK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 -; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, [[TMP11]], poison) -; VLENUNK-NEXT: [[TMP14:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], zeroinitializer, [[WIDE_MASKED_LOAD]] -; VLENUNK-NEXT: [[TMP15:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP10]] -; VLENUNK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; VLENUNK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 -; VLENUNK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; VLENUNK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP13:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP12]] +; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 +; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP15]], i32 4, [[TMP13]], poison) +; VLENUNK-NEXT: [[TMP16:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], zeroinitializer, [[WIDE_MASKED_LOAD]] +; VLENUNK-NEXT: [[TMP17:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP12]] +; VLENUNK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 +; VLENUNK-NEXT: store [[TMP17]], ptr [[TMP19]], align 4 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 72e3bca79a5a5..e50d7362365b8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -39,6 +39,8 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV32-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 +; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 ; RV32-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; RV32-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; RV32-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) @@ -62,8 +64,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] ; RV32-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 -; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 ; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -121,6 +121,8 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV64-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 +; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 ; RV64-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; RV64-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; RV64-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) @@ -144,8 +146,6 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] ; RV64-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 -; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 ; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index cad64f5c7e2be..b8cc57105bd1a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -51,10 +51,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count -; CHECK-NEXT: vp<%1> = original trip-count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK: ph: -; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -70,7 +71,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%add9> -; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -188,10 +189,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count -; CHECK-NEXT: vp<%1> = original trip-count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK: ph: -; CHECK-NEXT: EMIT vp<%1> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -207,7 +209,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%conv1> -; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index 90c64e33bcef6..77b0ae2f846a3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -19,20 +19,20 @@ define void @test(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 200 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 200 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -142,20 +142,20 @@ define void @trivial_due_max_vscale(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 8192 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 8192 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -208,20 +208,20 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 1024 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 32 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP6]], 1024 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP11]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index f7bc4bd35f377..c634c2cf7fc67 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -24,6 +24,8 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -35,8 +37,6 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 ; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -69,6 +69,8 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] @@ -80,8 +82,6 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 ; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -135,6 +135,8 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -146,8 +148,6 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 -; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -180,6 +180,8 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] @@ -191,8 +193,6 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 -; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -301,6 +301,8 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -312,8 +314,6 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 ; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] ; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -346,6 +346,8 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] @@ -357,8 +359,6 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 ; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] ; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -409,6 +409,8 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -420,8 +422,6 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] ; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; VLENUNK-NEXT: [[TMP8]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -459,6 +459,8 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -470,8 +472,6 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] ; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; VLEN128-NEXT: [[TMP8]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -529,6 +529,8 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -538,8 +540,6 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -570,6 +570,8 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] @@ -579,8 +581,6 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -627,6 +627,8 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -636,8 +638,6 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 ; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -668,6 +668,8 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 ; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] @@ -677,8 +679,6 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 ; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 0a8c7cfda9efb..affc6fc22c1ba 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -19,21 +19,21 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -85,21 +85,21 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -149,26 +149,26 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP11]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP10]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP12]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -221,19 +221,19 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -279,20 +279,20 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1025) ; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -373,21 +373,21 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 1024) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index c7bb4ca4b6c6c..34a7987bb40ab 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -38,6 +38,8 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 @@ -52,8 +54,6 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -113,6 +113,8 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[X:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 @@ -127,8 +129,6 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -184,6 +184,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -194,8 +196,6 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -257,6 +257,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement poison, i32 [[A:%.*]], i64 0 ; SCALABLE-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector [[MINMAX_IDENT_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 @@ -271,8 +273,6 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -330,6 +330,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -340,8 +342,6 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -428,6 +428,8 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -444,8 +446,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; SCALABLE-NEXT: [[TMP13:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), [[VEC_PHI]] ; SCALABLE-NEXT: [[TMP14:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[PREDPHI]] = select [[TMP8]], [[TMP13]], [[VEC_PHI]] -; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index 38ef0537e4318..331a8d2eb66e2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -14,6 +14,8 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -24,8 +26,6 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP8]], ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -75,6 +75,8 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -85,8 +87,6 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP8]], ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 65a2cb51b88ff..12fdf2149daf4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -17,6 +17,8 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[TMP5]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -35,8 +37,6 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP14:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP14]], [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -90,6 +90,8 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 64, i64 0), poison, zeroinitializer) @@ -107,8 +109,6 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) ; CHECK-NEXT: [[TMP11:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP11]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -236,6 +236,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -245,8 +247,6 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -320,6 +320,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -329,8 +331,6 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -452,6 +452,8 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP8]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; NOSTRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -463,8 +465,6 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP9]] ; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0 ; NOSTRIDED-NEXT: store [[TMP12]], ptr [[TMP14]], align 4 -; NOSTRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; NOSTRIDED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -518,6 +518,8 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]] ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; STRIDED-NEXT: [[TMP10:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP11:%.*]] = add [[TMP10]], zeroinitializer ; STRIDED-NEXT: [[TMP12:%.*]] = mul [[TMP11]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -539,8 +541,6 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; STRIDED-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[P2]], [[TMP16]] ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !11, !noalias !8 -; STRIDED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -601,6 +601,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]] ; NOSTRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; NOSTRIDED-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] +; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; NOSTRIDED: vector.body: ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -610,8 +612,6 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; NOSTRIDED-NEXT: store [[TMP8]], ptr [[TMP7]], align 4 -; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -729,6 +729,8 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]] ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], [[STRIDE]] ; STRIDED-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP11]] +; STRIDED-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() +; STRIDED-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 4 ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] @@ -761,8 +763,6 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP19]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !15 ; STRIDED-NEXT: [[TMP28:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) ; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP28]], [[TMP27]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)), !alias.scope !18, !noalias !15 -; STRIDED-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; STRIDED-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], 4 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP30]] ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP15]] ; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP23]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 89c3219c4b6d0..a12dfbaec195b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -21,6 +21,8 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -31,8 +33,6 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 -; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -102,6 +102,8 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -113,8 +115,6 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -196,6 +196,8 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -206,8 +208,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 -; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -326,6 +326,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -349,8 +351,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] ; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 ; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP14]], align 8 -; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -443,6 +443,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -470,8 +472,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or [[TMP13]], [[TMP15]] ; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP18]], i32 8, [[TMP17]]) -; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -581,6 +581,8 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -591,8 +593,6 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 -; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -662,6 +662,8 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -673,8 +675,6 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -756,6 +756,8 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -766,8 +768,6 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -837,6 +837,8 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -848,8 +850,6 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -931,6 +931,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -952,8 +954,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] ; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 ; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP15]], align 8 -; SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] ; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1029,6 +1029,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -1052,8 +1054,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1162,6 +1162,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 ; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -1185,8 +1187,6 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] ; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8 -; SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1275,6 +1275,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -1303,8 +1305,6 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or [[TMP13]], [[TMP16]] ; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, [[TMP17]]) -; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1412,6 +1412,8 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1422,8 +1424,6 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -1493,6 +1493,8 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1504,8 +1506,6 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 ; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 8ff542a36732d..c424f8ea220a8 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -5,6 +5,7 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-LABEL: 'test_chained_first_order_recurrences_1' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -23,7 +24,7 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: EMIT vp<[[FOR2_SPLICE:%.+]]> = first-order splice ir<%for.2>, vp<[[FOR1_SPLICE]]> ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[FOR1_SPLICE]]>, vp<[[FOR2_SPLICE]]> ; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -55,6 +56,7 @@ exit: define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-LABEL: 'test_chained_first_order_recurrences_3' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -76,7 +78,7 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: WIDEN ir<%add.1> = add vp<[[FOR1_SPLICE]]>, vp<[[FOR2_SPLICE]]> ; CHECK-NEXT: WIDEN ir<%add.2> = add ir<%add.1>, vp<[[FOR3_SPLICE]]> ; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add.2> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 3198f19a8f9a9..9b98c90473fa4 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -9,6 +9,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize { ; CHECK-LABEL: sink_replicate_region_1 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count @@ -65,7 +66,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -99,6 +100,7 @@ exit: define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-LABEL: sink_replicate_region_2 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count @@ -136,7 +138,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -168,6 +170,7 @@ exit: define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-LABEL: sink_replicate_region_3_reduction ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count @@ -205,7 +208,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[PRED]]>, ir<%recur.next> ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add> ; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]>, ir<%and.red.next>, ir<%and.red> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -242,6 +245,7 @@ exit: define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr %ptr, ptr noalias %dst) optsize { ; CHECK-LABEL: sink_replicate_region_4_requires_split_at_end_of_block ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count @@ -302,7 +306,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): loop.3 ; CHECK-EMPTY: ; CHECK: loop.3: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -340,6 +344,7 @@ exit: define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias %dst.2, i32 %x, i8 %y) optsize { ; CHECK-LABEL: sink_replicate_region_after_replicate_region ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -384,7 +389,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): loop.3 ; CHECK-EMPTY: ; CHECK-NEXT: loop.3: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -418,6 +423,7 @@ exit: ; preds = %loop define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias %dst) { ; CHECK-LABEL: need_new_block_after_sinking_pr56146 ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<3> = original trip-count @@ -455,7 +461,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index 6dac3531277dc..81e5fba71e6dc 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -37,6 +37,7 @@ for.end: ; Check for crash exposed by D76992. ; CHECK-LABEL: 'test' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<14> = original trip-count @@ -70,7 +71,7 @@ for.end: ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successor ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 028e0b7b39cba..242c3c073c8ea 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -5,6 +5,7 @@ ; DBG-LABEL: 'test_scalarize_call' ; DBG: VPlan 'Initial VPlan for VF={1},UF>=1' { +; DBG-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count ; DBG-EMPTY: @@ -23,7 +24,7 @@ ; DBG-NEXT: CLONE ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>) ; DBG-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%dst>, vp<[[IV_STEPS]]> ; DBG-NEXT: CLONE store ir<%min>, ir<%arrayidx> -; DBG-NEXT: EMIT vp<[[INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; DBG-NEXT: EMIT vp<[[INC:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; DBG-NEXT: EMIT branch-on-count vp<[[INC]]>, vp<[[VEC_TC]]> ; DBG-NEXT: No successors ; DBG-NEXT: } @@ -67,7 +68,8 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-LABEL: 'test_scalarize_with_branch_cond' -; DBG: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; DBG: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF +; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; DBG-NEXT: Live-in ir<1000> = original trip-count ; DBG-EMPTY: ; DBG-NEXT: vector.ph: @@ -100,7 +102,7 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Successor(s): cond.false.1 ; DBG-EMPTY: ; DBG-NEXT: cond.false.1: -; DBG-NEXT: EMIT vp<[[CAN_IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; DBG-NEXT: EMIT vp<[[CAN_IV_INC:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; DBG-NEXT: EMIT branch-on-count vp<[[CAN_IV_INC]]>, vp<[[VEC_TC]]> ; DBG-NEXT: No successors ; DBG-NEXT: } @@ -173,6 +175,7 @@ exit: ; DBG-LABEL: 'first_order_recurrence_using_induction' ; DBG: VPlan 'Initial VPlan for VF={1},UF>=1' { +; DBG-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; DBG-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count ; DBG-EMPTY: @@ -191,7 +194,7 @@ exit: ; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> ; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]> ; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst> -; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; DBG-NEXT: EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VTC]]> ; DBG-NEXT: No successors ; DBG-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll index c7d7ac3cb9ede..9023916d6e1cd 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll @@ -24,6 +24,8 @@ define void @foo() { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -55,8 +57,6 @@ define void @foo() { ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[VEC_PHI5]], [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[TMP16:%.*]] = add nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[TMP16]], shufflevector ( insertelement ( poison, i64 1024, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 6d059c13597be..3be31c011eaac 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -197,6 +197,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2: vector.ph ; CHECK-VF4UF2: call i32 @llvm.vscale.i32() ; CHECK-VF4UF2: call i32 @llvm.vscale.i32() +; CHECK-VF4UF2: call i32 @llvm.vscale.i32() ; CHECK-VF4UF2: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4 ; CHECK-VF4UF2: %[[SUB1:.*]] = sub i32 %[[MUL1]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index 034400ca2b0fb..2d5db6fc28332 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -19,33 +19,33 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 1 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 1 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 8 -; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] -; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]] -; CHECK-NEXT: store [[TMP12]], ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[TMP17]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP11]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP14:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP13]], ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP16]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP15]], i64 [[TMP17]] +; CHECK-NEXT: store [[TMP14]], ptr [[TMP18]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -101,30 +101,30 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv1i64() +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP7]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP8]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] -; CHECK-NEXT: [[TMP10:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[TMP9]], ptr [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP12]] -; CHECK-NEXT: store [[TMP10]], ptr [[TMP13]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP9]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD]], [[VEC_IND]] +; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_LOAD2]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[TMP11]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP14]] +; CHECK-NEXT: store [[TMP12]], ptr [[TMP15]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -189,21 +189,21 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[DOTCAST]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP5:%.*]] = shl [[TMP4]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], 3 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv4i32() +; CHECK-NEXT: [[TMP7:%.*]] = shl [[TMP6]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 3 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -267,25 +267,25 @@ define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float ; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[DOTCAST]], 2.000000e+00 ; CHECK-NEXT: [[IND_END:%.*]] = fadd float [[TMP4]], 0.000000e+00 -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv4i32() -; CHECK-NEXT: [[TMP6:%.*]] = uitofp [[TMP5]] to -; CHECK-NEXT: [[TMP7:%.*]] = fmul [[TMP6]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = fadd [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float -; CHECK-NEXT: [[TMP11:%.*]] = fmul float [[TMP10]], 2.000000e+00 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv4i32() +; CHECK-NEXT: [[TMP8:%.*]] = uitofp [[TMP7]] to +; CHECK-NEXT: [[TMP9:%.*]] = fmul [[TMP8]], shufflevector ( insertelement ( poison, float 2.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = fadd [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP11:%.*]] = shl i32 [[TMP10]], 2 +; CHECK-NEXT: [[TMP12:%.*]] = uitofp i32 [[TMP11]] to float +; CHECK-NEXT: [[TMP13:%.*]] = fmul float [[TMP12]], 2.000000e+00 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP13]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[VEC_IND]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll index 375d2e0ac8af5..b14d5a9f4bb57 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -20,18 +20,18 @@ define void @test(ptr %d) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -92,18 +92,18 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 128, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 128, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), ptr [[TMP8]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4096, ptr [[ARR]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll index 989d25c0099c5..6804f47c913c1 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll @@ -12,6 +12,8 @@ ; CHECKUF1-DAG: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 ; CHECKUF1-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX4]] ; CHECKUF1: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf +; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 ; CHECKUF1: vector.body: ; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] @@ -20,8 +22,6 @@ ; CHECKUF1: %[[FADD:.*]] = fadd %wide.load, shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) ; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index ; CHECKUF1: store %[[FADD]], ptr %[[IDXA]], align 8 -; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() -; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2 ; CHECKUF1: %index.next = add nuw i64 %index, %[[VSCALEX4]] ; CHECKUF1: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec ; CHECKUF1: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0 @@ -41,6 +41,8 @@ ; CHECKUF2-DAG: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 ; CHECKUF2-DAG: %n.mod.vf = urem i64 %wide.trip.count, %[[VSCALEX8]] ; CHECKUF2: %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf +; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() +; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 ; CHECKUF2: vector.body: ; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] @@ -58,8 +60,6 @@ ; CHECKUF2: %[[VSCALE2:.*]] = shl i64 %[[VSCALE]], 2 ; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXA]], i64 %[[VSCALE2]] ; CHECKUF2: store %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8 -; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() -; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3 ; CHECKUF2: %index.next = add nuw i64 %index, %[[VSCALEX8]] ; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec ; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index afe16c71f7f9c..e68e658f0e879 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -5,6 +5,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-LABEL: @reduction_add_trunc( +; CHECK: call i32 @llvm.vscale.i32() +; CHECK: call i32 @llvm.vscale.i32() +; CHECK: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16 ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ] @@ -21,8 +25,6 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP35:%.*]] = trunc [[TMP29]] to ; CHECK-NEXT: [[TMP34]] = zext [[TMP33]] to ; CHECK-NEXT: [[TMP36]] = zext [[TMP35]] to -; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]] ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}} ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll index 8c92074abfd82..18d2323ed6f5b 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll @@ -13,6 +13,8 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -21,8 +23,6 @@ define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 ; CHECK-NEXT: [[TMP4:%.*]] = trunc [[BROADCAST_SPLAT]] to ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store [[TMP4]], ptr [[TMP5]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -73,6 +73,8 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[LEN]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[LEN]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[ARG1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = trunc [[BROADCAST_SPLAT]] to @@ -84,8 +86,6 @@ define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) ; CHECK-NEXT: [[TMP6:%.*]] = xor [[WIDE_LOAD]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[TMP7]], ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index 9a4f174f439af..25937f145dab9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable { ; CHECK: digraph VPlan { -; CHECK-NEXT: graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1\nLive-in vp\<[[VEC_TC:%.+]]\> = vector-trip-count\nLive-in ir\<%n\> = original trip-count\n"] +; CHECK-NEXT: graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1\nLive-in vp\<[[VFxUF:%.+]]\> = runtime VF * UF\nLive-in vp\<[[VEC_TC:%.+]]\> = vector-trip-count\nLive-in ir\<%n\> = original trip-count\n"] ; CHECK-NEXT: node [shape=rect, fontname=Courier, fontsize=30] ; CHECK-NEXT: edge [fontname=Courier, fontsize=30] ; CHECK-NEXT: compound=true @@ -33,7 +33,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: " WIDEN-CALL ir\<%call\> = call @llvm.sqrt.f32(ir\<%lv\>) (using vector intrinsic)\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx2\> = getelementptr inbounds ir\<%x\>, vp\<[[STEPS]]\>\l" + ; CHECK-NEXT: " WIDEN store ir\<%arrayidx2\>, ir\<%call\>\l" + -; CHECK-NEXT: " EMIT vp\<[[CAN_IV_NEXT]]\> = VF * UF + nuw vp\<[[CAN_IV]]\>\l" + +; CHECK-NEXT: " EMIT vp\<[[CAN_IV_NEXT]]\> = add (part 0) nuw vp\<[[CAN_IV]]\>, vp\<[[VFxUF]]\>\l" + ; CHECK-NEXT: " EMIT branch-on-count vp\<[[CAN_IV_NEXT]]\>, vp\<[[VEC_TC]]\>\l" + ; CHECK-NEXT: "No successors\l" ; CHECK-NEXT: ] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 70ede8dc12b9c..a59d6f23107c7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -4,6 +4,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-LABEL: LV: Checking a loop in 'iv_no_binary_op_in_descriptor' ; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -17,7 +18,7 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%dst>, vp<[[STEPS:%.+]]> ; CHECK-NEXT: WIDEN store ir<%gep>, ir<%iv> -; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_INC:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 3626b8c9dce78..68e16e2a52652 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -9,6 +9,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable { ; CHECK-LABEL: Checking a loop in 'print_call_and_memory' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -24,7 +25,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>) ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%call> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -56,6 +57,7 @@ for.end: ; preds = %for.body, %entry define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ptr %z) nounwind uwtable { ; CHECK-LABEL: Checking a loop in 'print_widen_gep_and_select' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -74,7 +76,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: WIDEN ir<%add> = fadd ir<%lv>, ir<%sel> ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -108,6 +110,7 @@ for.end: ; preds = %for.body, %entry define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-LABEL: Checking a loop in 'print_reduction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -122,7 +125,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -154,6 +157,7 @@ for.end: ; preds = %for.body, %entry define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr noalias %dst) { ; CHECK-LABEL: Checking a loop in 'print_reduction_with_invariant_store' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -168,7 +172,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -199,6 +203,7 @@ for.end: ; preds = %for.body, %entry define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-LABEL: Checking a loop in 'print_replicate_predicated_phi' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -237,7 +242,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: BLEND ir<%d> = ir<0>/vp<[[NOT]]> vp<[[PRED]]>/ir<%cmp> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%idx>, ir<%d> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -277,6 +282,7 @@ for.end: ; preds = %for.inc define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-LABEL: Checking a loop in 'print_interleave_groups' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<256> = original trip-count ; CHECK-EMPTY: @@ -301,7 +307,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: store ir<1> to index 1 ; CHECK-NEXT: store ir<2> to index 2 ; CHECK-NEXT: store ir<%AB.3> to index 3 -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -345,6 +351,7 @@ for.end: define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-LABEL: Checking a loop in 'print_fmuladd_strict' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -362,7 +369,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: WIDEN ir<%l.b> = load ir<%arrayidx2> ; CHECK-NEXT: EMIT vp<[[FMUL:%.+]]> = fmul nnan ninf nsz ir<%l.a>, ir<%l.b> ; CHECK-NEXT: REDUCE ir<[[MULADD:%.+]]> = ir<%sum.07> + nnan ninf nsz reduce.fadd (vp<[[FMUL]]>) -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -396,6 +403,7 @@ for.end: define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 { ; CHECK-LABEL: Checking a loop in 'debug_loc_vpinstruction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<128> = original trip-count ; CHECK-EMPTY: @@ -436,7 +444,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = select vp<[[NOT1]]>, vp<[[NOT2]]>, ir ; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]>/vp<[[OR1]]> ir<%psd>/vp<[[SEL2]]> ; CHECK-NEXT: WIDEN store ir<%isd>, ir<%ysd.0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT:} @@ -482,6 +490,7 @@ declare float @llvm.fmuladd.f32(float, float, float) define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-LABEL: Checking a loop in 'print_expand_scev' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -504,7 +513,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%v2>, ir<1> ; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE store ir<%v3>, ir<%gep> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -537,6 +546,7 @@ loop.exit: define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-LABEL: Checking a loop in 'print_exit_value' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: @@ -551,7 +561,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: CLONE ir<%gep> = getelementptr inbounds ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%iv>, ir<%off> ; CHECK-NEXT: WIDEN store ir<%gep>, ir<0> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -583,6 +593,7 @@ exit: define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr %z) { ; CHECK-LABEL: Checking a loop in 'print_fast_math_flags' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -600,7 +611,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: WIDEN ir<%div> = fdiv reassoc nsz contract ir<%mul>, ir<2.000000e+00> ; CHECK-NEXT: CLONE ir<%gep.x> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%div> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -633,6 +644,7 @@ exit: define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-LABEL: Checking a loop in 'print_exact_flags' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -649,7 +661,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: WIDEN ir<%div.2> = udiv ir<%lv>, ir<60> ; CHECK-NEXT: WIDEN ir<%add> = add nuw nsw ir<%div.1>, ir<%div.2> ; CHECK-NEXT: WIDEN store ir<%gep.x>, ir<%add> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -681,6 +693,7 @@ exit: define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-LABEL: Checking a loop in 'print_call_flags' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: @@ -719,7 +732,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value>/vp<[[NOT_COND]]> ir<%fadd>/ir<%ifcond> ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN store ir<%st.addr>, ir<%st.value> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index 0dac63d54557d..eb12126b664f5 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -8,6 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-LABEL: sink_with_sideeffects ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: ir<0> = original trip-count ; CHECK-EMPTY: @@ -37,7 +38,7 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: } ; CHECK: if.then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index afc46de9a5a16..37ec033e668a5 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -11,6 +11,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-LABEL: LV: Checking a loop in 'sink1' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -50,7 +51,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -79,6 +80,7 @@ exit: ; CHECK-LABEL: LV: Checking a loop in 'sink2' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -133,7 +135,7 @@ exit: ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -162,6 +164,7 @@ exit: ; CHECK-LABEL: LV: Checking a loop in 'sink3' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -216,7 +219,7 @@ exit: ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -247,6 +250,7 @@ exit: define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-LABEL: LV: Checking a loop in 'uniform_gep' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<11> = original trip-count @@ -285,7 +289,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Successor(s): loop.then.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -317,6 +321,7 @@ exit: define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'pred_cfg1' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -378,7 +383,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): next.0.1 ; CHECK-EMPTY: ; CHECK-NEXT: next.0.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -416,6 +421,7 @@ exit: define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'pred_cfg2' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -479,7 +485,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.1.1 ; CHECK-EMPTY: ; CHECK-NEXT: then.1.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -524,6 +530,7 @@ exit: define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'pred_cfg3' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -588,7 +595,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.1.2 ; CHECK-EMPTY: ; CHECK-NEXT: then.1.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -632,6 +639,7 @@ exit: define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-LABEL: LV: Checking a loop in 'merge_3_replicate_region' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -695,7 +703,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.0.4 ; CHECK-EMPTY: ; CHECK-NEXT: then.0.4: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -736,6 +744,7 @@ exit: define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-LABEL: LV: Checking a loop in 'update_2_uses_in_same_recipe_in_merged_block' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -775,7 +784,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -802,6 +811,7 @@ exit: define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-LABEL: LV: Checking a loop in 'recipe_in_merge_candidate_used_by_first_order_recurrence' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -859,7 +869,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -887,6 +897,7 @@ exit: define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-LABEL: LV: Checking a loop in 'update_multiple_users' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<999> = original trip-count ; CHECK-EMPTY: @@ -918,7 +929,7 @@ define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-NEXT: Successor(s): loop.then.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.then.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -951,6 +962,7 @@ exit: define void @sinking_requires_duplication(ptr %addr) { ; CHECK-LABEL: LV: Checking a loop in 'sinking_requires_duplication' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<201> = original trip-count ; CHECK-EMPTY: @@ -983,7 +995,7 @@ define void @sinking_requires_duplication(ptr %addr) { ; CHECK-NEXT: Successor(s): then.0 ; CHECK-EMPTY: ; CHECK-NEXT: then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -1019,6 +1031,7 @@ exit: define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr noalias %dst) optsize { ; CHECK-LABEL: LV: Checking a loop in 'merge_with_dead_gep_between_regions' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<%n> = original trip-count @@ -1054,7 +1067,7 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -1085,6 +1098,7 @@ exit: define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = runtime VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: @@ -1121,7 +1135,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): if.then.0 ; CHECK-EMPTY: ; CHECK-NEXT: if.then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = VF * UF + nuw vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add (part 0) nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: }