Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8620,9 +8620,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,

// Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
// IV by VF * UF.
auto *CanonicalIVIncrement =
new VPInstruction(VPInstruction::CanonicalIVIncrement, {CanonicalIVPHI},
{HasNUW, false}, DL, "index.next");
auto *CanonicalIVIncrement = new VPInstruction(
VPInstruction::AddPart0, {CanonicalIVPHI, &Plan.getRuntimeVFxUF()},
{HasNUW, false}, DL, "index.next");
CanonicalIVPHI->addOperand(CanonicalIVIncrement);

VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,12 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(&VectorTripCount, VectorTripCountV, Part);

IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model runtime VF * UF computation completely in VPlan.
State.set(&RuntimeVFxUF,
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
0);

// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
Expand All @@ -752,7 +758,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
return isa<VPScalarIVStepsRecipe>(U) ||
isa<VPDerivedIVRecipe>(U) ||
cast<VPInstruction>(U)->getOpcode() ==
VPInstruction::CanonicalIVIncrement;
VPInstruction::AddPart0;
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
Expand Down Expand Up @@ -845,6 +851,13 @@ void VPlan::execute(VPTransformState *State) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPlan::printLiveIns(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);

if (RuntimeVFxUF.getNumUsers() > 0) {
O << "\nLive-in ";
RuntimeVFxUF.printAsOperand(O, SlotTracker);
O << " = runtime VF * UF";
}

if (VectorTripCount.getNumUsers() > 0) {
O << "\nLive-in ";
VectorTripCount.printAsOperand(O, SlotTracker);
Expand Down Expand Up @@ -1237,6 +1250,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
}

void VPSlotTracker::assignSlots(const VPlan &Plan) {
if (Plan.RuntimeVFxUF.getNumUsers() > 0)
assignSlot(&Plan.RuntimeVFxUF);
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
assignSlot(Plan.BackedgeTakenCount);
Expand Down
13 changes: 9 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1029,9 +1029,8 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
SLPStore,
ActiveLaneMask,
CalculateTripCountMinusVF,
CanonicalIVIncrement,
// The next op is similar to the above, but instead increment the
// canonical IV separately for each unrolled part.
AddPart0,
// Increment the canonical IV separately for each unrolled part.
CanonicalIVIncrementForPart,
BranchOnCount,
BranchOnCond
Expand Down Expand Up @@ -1139,7 +1138,7 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
return false;
case VPInstruction::ActiveLaneMask:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::AddPart0:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
return true;
Expand Down Expand Up @@ -2512,6 +2511,9 @@ class VPlan {
/// Represents the vector trip count.
VPValue VectorTripCount;

/// Represents the loop-invariant runtime VF * UF of the vector loop region.
VPValue RuntimeVFxUF;

/// Holds a mapping between Values and their corresponding VPValue inside
/// VPlan.
Value2VPValueTy Value2VPValue;
Expand Down Expand Up @@ -2591,6 +2593,9 @@ class VPlan {
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }

/// Returns runtime VF * UF for the vector loop region.
VPValue &getRuntimeVFxUF() { return RuntimeVFxUF; }

/// Mark the plan to indicate that using Value2VPValue is not safe any
/// longer, because it may be stale.
void disableValue2VPValue() { Value2VPValueEnabled = false; }
Expand Down
13 changes: 5 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case Instruction::ICmp:
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::AddPart0:
case VPInstruction::CanonicalIVIncrementForPart:
return false;
default:
Expand Down Expand Up @@ -335,13 +335,10 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
return Builder.CreateSelect(Cmp, Sub, Zero);
}
case VPInstruction::CanonicalIVIncrement: {
case VPInstruction::AddPart0: {
if (Part == 0) {
auto *Phi = State.get(getOperand(0), 0);
// The loop step is equal to the vectorization factor (num of SIMD
// elements) times the unroll factor (num of SIMD instructions).
Value *Step =
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
Value *Step = State.get(getOperand(1), 0);
return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(),
hasNoSignedWrap());
}
Expand Down Expand Up @@ -468,8 +465,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::FirstOrderRecurrenceSplice:
O << "first-order splice";
break;
case VPInstruction::CanonicalIVIncrement:
O << "VF * UF +";
case VPInstruction::AddPart0:
O << "add (part 0)";
break;
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ define void @f1(ptr %A) #0 {
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
; SVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; SVE-NEXT: br label [[VECTOR_BODY:%.*]]
; SVE: vector.body:
; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand All @@ -66,8 +68,6 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP7]]
; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
; SVE-NEXT: [[TMP9]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down
Loading