Skip to content

Commit

Permalink
[VPlan] Properly handle sinking of replicate regions.
Browse files Browse the repository at this point in the history
This patch updates the code that sinks recipes required for first-order
recurrences to properly handle replicate-regions. At the moment, the
code would just move the replicate recipe out of its replicate-region,
producing an invalid VPlan.

When sinking a recipe in a replicate-region, we have to sink the whole
region. To do that, we first need to split the block at the target
recipe and move the region in between.

This patch also adds a splitAt helper to VPBasicBlock to split a
VPBasicBlock at a given iterator.

Fixes PR50009.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D100751
  • Loading branch information
fhahn committed May 4, 2021
1 parent 1f7adf8 commit ccebf7a
Show file tree
Hide file tree
Showing 4 changed files with 267 additions and 1 deletion.
33 changes: 32 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9094,6 +9094,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
for (auto &Entry : SinkAfter) {
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);

// If the target is in a replication region, make sure to move Sink to the
// block after it, not into the replication region itself.
if (auto *Region =
Expand All @@ -9106,7 +9107,37 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
continue;
}
}
Sink->moveAfter(Target);

auto *SinkRegion =
dyn_cast_or_null<VPRegionBlock>(Sink->getParent()->getParent());
// Unless the sink source is in a replicate region, sink the recipe
// directly.
if (!SinkRegion || !SinkRegion->isReplicator()) {
Sink->moveAfter(Target);
continue;
}

// If the sink source is in a replicate region, we need to move the whole
// replicate region, which should only contain a single recipe in the main
// block.
assert(Sink->getParent()->size() == 1 &&
"parent must be a replicator with a single recipe");
auto *SplitBlock =
Target->getParent()->splitAt(std::next(Target->getIterator()));

auto *Pred = SinkRegion->getSinglePredecessor();
auto *Succ = SinkRegion->getSingleSuccessor();
VPBlockUtils::disconnectBlocks(Pred, SinkRegion);
VPBlockUtils::disconnectBlocks(SinkRegion, Succ);
VPBlockUtils::connectBlocks(Pred, Succ);

auto *SplitPred = SplitBlock->getSinglePredecessor();

VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock);
VPBlockUtils::connectBlocks(SplitPred, SinkRegion);
VPBlockUtils::connectBlocks(SinkRegion, SplitBlock);
if (VPBB == SplitPred)
VPBB = SplitBlock;
}

// Interleave memory: for each Interleave Group we marked earlier as relevant
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,32 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
}
}

VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
assert(SplitAt->getParent() == this &&
"can only split at a position in the same block");

SmallVector<VPBlockBase *, 2> Succs(getSuccessors().begin(),
getSuccessors().end());
// First, disconnect the current block from its successors.
for (VPBlockBase *Succ : Succs)
VPBlockUtils::disconnectBlocks(this, Succ);

// Create new empty block after the block to split.
auto *SplitBlock = new VPBasicBlock(getName() + ".split");
VPBlockUtils::insertBlockAfter(SplitBlock, this);

// Add successors for block to split to new block.
for (VPBlockBase *Succ : Succs)
VPBlockUtils::connectBlocks(SplitBlock, Succ);

// Finally, move the recipes starting at SplitAt to new block.
for (VPRecipeBase &ToMove :
make_early_inc_range(make_range(SplitAt, this->end())))
ToMove.moveBefore(*SplitBlock, SplitBlock->end());

return SplitBlock;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,11 @@ class VPBasicBlock : public VPBlockBase {

void dropAllReferences(VPValue *NewValue) override;

/// Split current block at \p SplitAt by inserting a new block between the
/// current block and its successors and moving all recipes starting at
/// SplitAt to the new block. Returns the new block.
VPBasicBlock *splitAt(iterator SplitAt);

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
; REQUIRES: asserts
; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize 2>&1 | FileCheck %s

target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

; Test cases for PR50009, which require sinking a replicate-region due to a
; first-order recurrence.

define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
; CHECK-LABEL: sink_replicate_region_1
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
; CHECK-NEXT: loop:
; CHECK-NEXT: WIDEN-PHI %0 = phi 0, %conv
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
; CHECK-NEXT: Successor(s): loop.0

; CHECK: loop.0:
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
; CHECK-NEXT: Successor(s): pred.load

; CHECK: <xVFxUF> pred.load: {
; CHECK-NEXT: pred.load.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)

; CHECK: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%lv> = load ir<%gep> (S->V)
; CHECK-NEXT: Successor(s): pred.load.continue

; CHECK: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%lv>
; CHECK-NEXT: No successors
; CHECK-NEXT: }

; CHECK: loop.1:
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6>
; CHECK-NEXT: Successor(s): pred.srem

; CHECK: <xVFxUF> pred.srem: {
; CHECK-NEXT: pred.srem.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)

; CHECK: pred.srem.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
; CHECK-NEXT: Successor(s): pred.srem.continue

; CHECK: pred.srem.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }

; CHECK: loop.1.split:
; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%9>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %loop

loop:
%0 = phi i32 [ 0, %entry ], [ %conv, %loop ]
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%rem = srem i32 %0, %x
%gep = getelementptr i8, i8* %ptr, i32 %iv
%lv = load i8, i8* %gep
%conv = sext i8 %lv to i32
%add = add i32 %conv, %rem
%iv.next = add nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 20001
br i1 %ec, label %exit, label %loop

exit:
ret void
}

define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
; CHECK-LABEL: sink_replicate_region_2
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
; CHECK-NEXT: loop:
; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
; CHECK-NEXT: Successor(s): loop.0

; CHECK: loop.0:
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: Successor(s): pred.srem

; CHECK: <xVFxUF> pred.srem: {
; CHECK-NEXT: pred.srem.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)

; CHECK: pred.srem.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
; CHECK-NEXT: Successor(s): pred.srem.continue

; CHECK: pred.srem.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }

; CHECK: loop.0.split:
; CHECK-NEXT: REPLICATE ir<%add> = add vp<%6>, ir<%recur.next>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
; CHECK-NEXT: Successor(s): pred.store

; CHECK: <xVFxUF> pred.store: {
; CHECK-NEXT: pred.store.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)

; CHECK: pred.store.if:
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
; CHECK-NEXT: Successor(s): pred.store.continue

; CHECK: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }

; CHECK: loop.1:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %loop

loop:
%recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%rem = srem i32 %recur, %x
%recur.next = sext i8 %y to i32
%add = add i32 %rem, %recur.next
%gep = getelementptr i32, i32* %ptr, i32 %iv
store i32 %add, i32* %gep
%iv.next = add nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 20001
br i1 %ec, label %exit, label %loop

exit:
ret void
}

define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize {
; CHECK-LABEL: sink_replicate_region_3_reduction
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
; CHECK-NEXT: loop:
; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
; CHECK-NEXT: WIDEN-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv> vp<%0>
; CHECK-NEXT: Successor(s): loop.0

; CHECK: loop.0:
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: Successor(s): pred.srem

; CHECK: <xVFxUF> pred.srem: {
; CHECK-NEXT: pred.srem.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%4>
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
; CHECK-NEXT: CondBit: vp<%4> (loop)

; CHECK: pred.srem.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> (S->V)
; CHECK-NEXT: Successor(s): pred.srem.continue

; CHECK: pred.srem.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }

; CHECK: loop.0.split:
; CHECK-NEXT: WIDEN ir<%add> = add vp<%7>, ir<%recur.next>
; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
; CHECK-NEXT: EMIT vp<%10> = select vp<%4> ir<%and.red.next> ir<%and.red>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
entry:
br label %loop

loop:
%recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ]
%rem = srem i32 %recur, %x
%recur.next = sext i8 %y to i32
%add = add i32 %rem, %recur.next
%and.red.next = and i32 %and.red, %add
%iv.next = add nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 20001
br i1 %ec, label %exit, label %loop

exit:
%res = phi i32 [ %and.red.next, %loop ]
ret i32 %res
}

0 comments on commit ccebf7a

Please sign in to comment.