Skip to content

Commit 4e4ecae

Browse files
committed
[LV][VPlan] Change to implement VPlan based predication for VPlan-native path Context: Patch Series #2 for outer loop vectorization support in LV using VPlan. (RFC: http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). Patch series #2 checks that inner loops are still trivially lock-step among all vector elements. Non-loop branches are blindly assumed as divergent. Changes here implement VPlan based predication algorithm to compute predicates for blocks that need predication. Predicates are computed for the VPLoop region in reverse post order. A block's predicate is computed as OR of the masks of all incoming edges. The mask for an incoming edge is computed as AND of predecessor block's predicate and either predecessor's Condition bit or NOT(Condition bit) depending on whether the edge from predecessor block to the current block is true or false edge. Reviewers: fhahn, rengolin, hsaito, dcaballe Reviewed By: fhahn Patch by Satish Guggilla, thanks! Differential Revision: https://reviews.llvm.org/D53349 llvm-svn: 351990
1 parent 020ce3f commit 4e4ecae

File tree

9 files changed

+650
-6
lines changed

9 files changed

+650
-6
lines changed

llvm/lib/Transforms/Vectorize/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ add_llvm_library(LLVMVectorize
77
VPlan.cpp
88
VPlanHCFGBuilder.cpp
99
VPlanHCFGTransforms.cpp
10+
VPlanPredicator.cpp
1011
VPlanSLP.cpp
1112
VPlanVerifier.cpp
1213

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ using namespace llvm;
2222
#define LV_NAME "loop-vectorize"
2323
#define DEBUG_TYPE LV_NAME
2424

25+
extern cl::opt<bool> EnableVPlanPredication;
26+
2527
static cl::opt<bool>
2628
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
2729
cl::desc("Enable if-conversion during vectorization."));
@@ -487,7 +489,10 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
487489
// Check whether the BranchInst is a supported one. Only unconditional
488490
// branches, conditional branches with an outer loop invariant condition or
489491
// backedges are supported.
490-
if (Br && Br->isConditional() &&
492+
// FIXME: We skip these checks when VPlan predication is enabled as we
493+
// want to allow divergent branches. This whole check will be removed
494+
// once VPlan predication is on by default.
495+
if (!EnableVPlanPredication && Br && Br->isConditional() &&
491496
!TheLoop->isLoopInvariant(Br->getCondition()) &&
492497
!LI->isLoopHeader(Br->getSuccessor(0)) &&
493498
!LI->isLoopHeader(Br->getSuccessor(1))) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+22-5
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "VPRecipeBuilder.h"
5959
#include "VPlanHCFGBuilder.h"
6060
#include "VPlanHCFGTransforms.h"
61+
#include "VPlanPredicator.h"
6162
#include "llvm/ADT/APInt.h"
6263
#include "llvm/ADT/ArrayRef.h"
6364
#include "llvm/ADT/DenseMap.h"
@@ -255,6 +256,13 @@ cl::opt<bool> EnableVPlanNativePath(
255256
cl::desc("Enable VPlan-native vectorization path with "
256257
"support for outer loop vectorization."));
257258

259+
// FIXME: Remove this switch once we have divergence analysis. Currently we
260+
// assume divergent non-backedge branches when this switch is true.
261+
cl::opt<bool> EnableVPlanPredication(
262+
"enable-vplan-predication", cl::init(false), cl::Hidden,
263+
cl::desc("Enable VPlan-native vectorization path predicator with "
264+
"support for outer loop vectorization."));
265+
258266
// This flag enables the stress testing of the VPlan H-CFG construction in the
259267
// VPlan-native vectorization path. It must be used in conjuction with
260268
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -6896,13 +6904,22 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
68966904
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
68976905
HCFGBuilder.buildHierarchicalCFG();
68986906

6907+
for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
6908+
Plan->addVF(VF);
6909+
6910+
if (EnableVPlanPredication) {
6911+
VPlanPredicator VPP(*Plan);
6912+
VPP.predicate();
6913+
6914+
// Avoid running transformation to recipes until masked code generation in
6915+
// VPlan-native path is in place.
6916+
return Plan;
6917+
}
6918+
68996919
SmallPtrSet<Instruction *, 1> DeadInstructions;
69006920
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
69016921
Plan, Legal->getInductionVars(), DeadInstructions);
69026922

6903-
for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
6904-
Plan->addVF(VF);
6905-
69066923
return Plan;
69076924
}
69086925

@@ -7119,8 +7136,8 @@ static bool processLoopInVPlanNativePath(
71197136
VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
71207137

71217138
// If we are stress testing VPlan builds, do not attempt to generate vector
7122-
// code.
7123-
if (VPlanBuildStressTest)
7139+
// code. Masked vector code generation support will follow soon.
7140+
if (VPlanBuildStressTest || EnableVPlanPredication)
71247141
return false;
71257142

71267143
LVP.setBestPlan(VF.Width, 1);

llvm/lib/Transforms/Vectorize/VPlan.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,19 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
560560
bumpIndent(1);
561561
OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
562562
bumpIndent(1);
563+
564+
// Dump the block predicate.
565+
const VPValue *Pred = BasicBlock->getPredicate();
566+
if (Pred) {
567+
OS << " +\n" << Indent << " \"BlockPredicate: ";
568+
if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
569+
PredI->printAsOperand(OS);
570+
OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
571+
<< ")\\l\"";
572+
} else
573+
Pred->printAsOperand(OS);
574+
}
575+
563576
for (const VPRecipeBase &Recipe : *BasicBlock)
564577
Recipe.print(OS, Indent);
565578

llvm/lib/Transforms/Vectorize/VPlan.h

+53
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ class VPBlockBase {
352352
/// Successor selector, null for zero or single successor blocks.
353353
VPValue *CondBit = nullptr;
354354

355+
/// Current block predicate - null if the block does not need a predicate.
356+
VPValue *Predicate = nullptr;
357+
355358
/// Add \p Successor as the last successor to this block.
356359
void appendSuccessor(VPBlockBase *Successor) {
357360
assert(Successor && "Cannot add nullptr successor!");
@@ -490,6 +493,12 @@ class VPBlockBase {
490493

491494
void setCondBit(VPValue *CV) { CondBit = CV; }
492495

496+
VPValue *getPredicate() { return Predicate; }
497+
498+
const VPValue *getPredicate() const { return Predicate; }
499+
500+
void setPredicate(VPValue *Pred) { Predicate = Pred; }
501+
493502
/// Set a given VPBlockBase \p Successor as the single successor of this
494503
/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
495504
/// This VPBlockBase must have no successors.
@@ -520,6 +529,15 @@ class VPBlockBase {
520529
appendPredecessor(Pred);
521530
}
522531

532+
/// Remove all the predecessor of this block.
533+
void clearPredecessors() { Predecessors.clear(); }
534+
535+
/// Remove all the successors of this block and set to null its condition bit
536+
void clearSuccessors() {
537+
Successors.clear();
538+
CondBit = nullptr;
539+
}
540+
523541
/// The method which generates the output IR that correspond to this
524542
/// VPBlockBase, thereby "executing" the VPlan.
525543
virtual void execute(struct VPTransformState *State) = 0;
@@ -1490,6 +1508,41 @@ class VPBlockUtils {
14901508
From->removeSuccessor(To);
14911509
To->removePredecessor(From);
14921510
}
1511+
1512+
/// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
1513+
static bool isBackEdge(const VPBlockBase *FromBlock,
1514+
const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
1515+
assert(FromBlock->getParent() == ToBlock->getParent() &&
1516+
FromBlock->getParent() && "Must be in same region");
1517+
const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
1518+
const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
1519+
if (!FromLoop || !ToLoop || FromLoop != ToLoop)
1520+
return false;
1521+
1522+
// A back-edge is a branch from the loop latch to its header.
1523+
return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
1524+
}
1525+
1526+
/// Returns true if \p Block is a loop latch
1527+
static bool blockIsLoopLatch(const VPBlockBase *Block,
1528+
const VPLoopInfo *VPLInfo) {
1529+
if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
1530+
return ParentVPL->isLoopLatch(Block);
1531+
1532+
return false;
1533+
}
1534+
1535+
/// Count and return the number of succesors of \p PredBlock excluding any
1536+
/// backedges.
1537+
static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
1538+
VPLoopInfo *VPLI) {
1539+
unsigned Count = 0;
1540+
for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
1541+
if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
1542+
Count++;
1543+
}
1544+
return Count;
1545+
}
14931546
};
14941547

14951548
class VPInterleavedAccessInfo {

0 commit comments

Comments
 (0)