Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8645,7 +8645,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
// Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
// IV by VF * UF.
auto *CanonicalIVIncrement =
new VPInstruction(VPInstruction::CanonicalIVIncrement, {CanonicalIVPHI},
new VPInstruction(Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()},
{HasNUW, false}, DL, "index.next");
CanonicalIVPHI->addOperand(CanonicalIVIncrement);

Expand Down
22 changes: 21 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,12 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(&VectorTripCount, VectorTripCountV, Part);

IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
State.set(&VFxUF,
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
0);

// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
Expand All @@ -752,7 +758,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
return isa<VPScalarIVStepsRecipe>(U) ||
isa<VPDerivedIVRecipe>(U) ||
cast<VPInstruction>(U)->getOpcode() ==
VPInstruction::CanonicalIVIncrement;
Instruction::Add;
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
Expand Down Expand Up @@ -845,6 +851,13 @@ void VPlan::execute(VPTransformState *State) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPlan::printLiveIns(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);

if (VFxUF.getNumUsers() > 0) {
O << "\nLive-in ";
VFxUF.printAsOperand(O, SlotTracker);
O << " = VF * UF";
}

if (VectorTripCount.getNumUsers() > 0) {
O << "\nLive-in ";
VectorTripCount.printAsOperand(O, SlotTracker);
Expand Down Expand Up @@ -1237,6 +1250,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
}

void VPSlotTracker::assignSlots(const VPlan &Plan) {
if (Plan.VFxUF.getNumUsers() > 0)
assignSlot(&Plan.VFxUF);
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
assignSlot(Plan.BackedgeTakenCount);
Expand All @@ -1260,6 +1275,11 @@ bool vputils::onlyFirstLaneUsed(VPValue *Def) {
[Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
}

bool vputils::onlyFirstPartUsed(VPValue *Def) {
return all_of(Def->users(),
[Def](VPUser *U) { return U->onlyFirstPartUsed(Def); });
}

VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
ScalarEvolution &SE) {
if (auto *Expanded = Plan.getSCEVExpansion(Expr))
Expand Down
36 changes: 32 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1058,9 +1058,7 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
SLPStore,
ActiveLaneMask,
CalculateTripCountMinusVF,
CanonicalIVIncrement,
// The next op is similar to the above, but instead increment the
// canonical IV separately for each unrolled part.
// Increment the canonical IV separately for each unrolled part.
CanonicalIVIncrementForPart,
BranchOnCount,
BranchOnCond
Expand Down Expand Up @@ -1168,13 +1166,27 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
return false;
case VPInstruction::ActiveLaneMask:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
return true;
};
llvm_unreachable("switch should return");
}

/// Returns true if the recipe only uses the first part of operand \p Op.
bool onlyFirstPartUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
if (getOperand(0) != Op)
return false;
switch (getOpcode()) {
default:
return false;
case VPInstruction::BranchOnCount:
return true;
};
llvm_unreachable("switch should return");
}
};

/// VPWidenRecipe is a recipe for producing a copy of vector type its
Expand Down Expand Up @@ -2126,6 +2138,13 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
return true;
}

/// Returns true if the recipe only uses the first part of operand \p Op.
bool onlyFirstPartUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}

/// Check if the induction described by \p Kind, /p Start and \p Step is
/// canonical, i.e. has the same start, step (of 1), and type as the
/// canonical IV.
Expand Down Expand Up @@ -2545,6 +2564,9 @@ class VPlan {
/// Represents the vector trip count.
VPValue VectorTripCount;

/// Represents the loop-invariant VF * UF of the vector loop region.
VPValue VFxUF;

/// Holds a mapping between Values and their corresponding VPValue inside
/// VPlan.
Value2VPValueTy Value2VPValue;
Expand Down Expand Up @@ -2624,6 +2646,9 @@ class VPlan {
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }

/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }

/// Mark the plan to indicate that using Value2VPValue is not safe any
/// longer, because it may be stale.
void disableValue2VPValue() { Value2VPValueEnabled = false; }
Expand Down Expand Up @@ -3054,6 +3079,9 @@ namespace vputils {
/// Returns true if only the first lane of \p Def is used.
bool onlyFirstLaneUsed(VPValue *Def);

/// Returns true if only the first part of \p Def is used.
bool onlyFirstPartUsed(VPValue *Def);

/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
Expand Down
32 changes: 8 additions & 24 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case Instruction::ICmp:
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementForPart:
return false;
default:
Expand Down Expand Up @@ -272,9 +271,16 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Builder.SetCurrentDebugLocation(getDebugLoc());

if (Instruction::isBinaryOp(getOpcode())) {
if (Part != 0 && vputils::onlyFirstPartUsed(this))
return State.get(this, 0);

Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
auto *Res =
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
if (auto *I = dyn_cast<Instruction>(Res))
setFlags(I);
return Res;
}

switch (getOpcode()) {
Expand Down Expand Up @@ -335,25 +341,6 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
return Builder.CreateSelect(Cmp, Sub, Zero);
}
case VPInstruction::CanonicalIVIncrement: {
if (Part == 0) {
auto *Phi = State.get(getOperand(0), 0);
// The loop step is equal to the vectorization factor (num of SIMD
// elements) times the unroll factor (num of SIMD instructions).
Value *Step;
{
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
IRBuilder<> PHBuilder(VectorPH->getTerminator());
// Step is loop-invariant, calls to vscale will be placed in the
// preheader.
Step = createStepForVF(PHBuilder, Phi->getType(), State.VF, State.UF);
}
return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(),
hasNoSignedWrap());
}
return State.get(this, 0);
}

case VPInstruction::CanonicalIVIncrementForPart: {
auto *IV = State.get(getOperand(0), VPIteration(0, 0));
if (Part == 0)
Expand Down Expand Up @@ -474,9 +461,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::FirstOrderRecurrenceSplice:
O << "first-order splice";
break;
case VPInstruction::CanonicalIVIncrement:
O << "VF * UF +";
break;
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,14 @@ class VPUser {
"Op must be an operand of the recipe");
return false;
}

/// Returns true if the VPUser only uses the first part of operand \p Op.
/// Conservatively returns false.
virtual bool onlyFirstPartUsed(const VPValue *Op) const {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return false;
}
};

/// This class augments a recipe with a set of VPValues defined by the recipe.
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
; TFCOMMON: vector.body:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -161,9 +161,9 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFCOMMON-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
; TFCOMMON: vector.body:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -283,9 +283,9 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFCOMMON-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; TFCOMMON-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
; TFCOMMON: vector.body:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -516,9 +516,9 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFALWAYS-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFALWAYS-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]]
; TFALWAYS: vector.body:
; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -546,9 +546,9 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFFALLBACK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFFALLBACK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]]
; TFFALLBACK: vector.body:
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -599,10 +599,10 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; TFNONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
; TFNONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -651,11 +651,11 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFALWAYS-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFALWAYS-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; TFALWAYS-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFALWAYS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
; TFALWAYS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
; TFALWAYS-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; TFALWAYS-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]]
; TFALWAYS: vector.body:
; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down Expand Up @@ -688,11 +688,11 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFFALLBACK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]]
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFFALLBACK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; TFFALLBACK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
; TFFALLBACK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[M:%.*]], i64 0
; TFFALLBACK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x double> [[BROADCAST_SPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
; TFFALLBACK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; TFFALLBACK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]]
; TFFALLBACK: vector.body:
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ define void @foo() {
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
Expand All @@ -27,8 +29,6 @@ define void @foo() {
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[OUTER_LOOP_LATCH4:%.*]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
Expand All @@ -35,8 +37,6 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]]
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down
Loading