Skip to content

Commit

Permalink
[x86][CGP] improve sinking of splatted vector shift amount operand
Browse files Browse the repository at this point in the history
Expands on the enablement of the shouldSinkOperands() TLI hook in:
D79718

The last codegen/IR test diff shows what I suspected could happen - we were
sinking all splat shift operands into a loop. But that's not what we want in
general; we only want to sink the *shift amount* operand if it is a splat.

Differential Revision: https://reviews.llvm.org/D79827
  • Loading branch information
rotateright committed May 14, 2020
1 parent c5ff403 commit 26e742f
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 215 deletions.
68 changes: 1 addition & 67 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,6 @@ class TypePromotionTransaction;
bool optimizeLoadExt(LoadInst *Load);
bool optimizeShiftInst(BinaryOperator *BO);
bool optimizeSelectInst(SelectInst *SI);
bool sinkShuffleVectorToShift(ShuffleVectorInst *SVI);
bool convertSplatType(ShuffleVectorInst *SVI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
Expand Down Expand Up @@ -6417,66 +6415,10 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
return true;
}

/// Some targets have expensive vector shifts if the lanes aren't all the same
/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
/// it's often worth sinking a shufflevector splat down to its use so that
/// codegen can spot all lanes are identical.
bool CodeGenPrepare::sinkShuffleVectorToShift(ShuffleVectorInst *SVI) {
BasicBlock *DefBB = SVI->getParent();

// Only do this xform if variable vector shifts are particularly expensive.
if (!TLI->isVectorShiftByScalarCheap(SVI->getType()))
return false;

// We only expect better codegen by sinking a shuffle if we can recognise a
// constant splat.
if (getSplatIndex(SVI->getShuffleMask()) < 0)
return false;

// InsertedShuffles - Only insert a shuffle in each block once.
DenseMap<BasicBlock*, Instruction*> InsertedShuffles;

bool MadeChange = false;
for (User *U : SVI->users()) {
Instruction *UI = cast<Instruction>(U);

// Figure out which BB this ext is used in.
BasicBlock *UserBB = UI->getParent();
if (UserBB == DefBB) continue;

// For now only apply this when the splat is used by a shift instruction.
if (!UI->isShift()) continue;

// Everything checks out, sink the shuffle if the user's block doesn't
// already have a copy.
Instruction *&InsertedShuffle = InsertedShuffles[UserBB];

if (!InsertedShuffle) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
InsertedShuffle =
new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
SVI->getShuffleMask(), "", &*InsertPt);
InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
}

UI->replaceUsesOfWith(SVI, InsertedShuffle);
MadeChange = true;
}

// If we removed all uses, nuke the shuffle.
if (SVI->use_empty()) {
SVI->eraseFromParent();
MadeChange = true;
}

return MadeChange;
}

/// Some targets only accept certain types for splat inputs. For example a VDUP
/// in MVE takes a GPR (integer) register, and the instruction that incorporate
/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
bool CodeGenPrepare::convertSplatType(ShuffleVectorInst *SVI) {
bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
if (!match(SVI,
m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask())))
Expand Down Expand Up @@ -6516,14 +6458,6 @@ bool CodeGenPrepare::convertSplatType(ShuffleVectorInst *SVI) {
return true;
}

bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
if (sinkShuffleVectorToShift(SVI))
return true;
if (convertSplatType(SVI))
return true;
return false;
}

bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
// If the operands of I can be folded into a target instruction together with
// I, duplicate and sink them.
Expand Down
29 changes: 17 additions & 12 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30680,18 +30680,23 @@ bool X86TargetLowering::shouldSinkOperands(Instruction *I,
// A uniform shift amount in a vector shift or funnel shift may be much
// cheaper than a generic variable vector shift, so make that pattern visible
// to SDAG by sinking the shuffle instruction next to the shift.
// TODO: This should handle normal shift opcodes too.
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
Intrinsic::ID ID = II->getIntrinsicID();
if (ID == Intrinsic::fshl || ID == Intrinsic::fshr) {
// The shift amount operand for these intrinsics is operand 2.
auto *Shuf = dyn_cast<ShuffleVectorInst>(II->getOperand(2));
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
isVectorShiftByScalarCheap(I->getType())) {
Ops.push_back(&I->getOperandUse(2));
return true;
}
}
int ShiftAmountOpNum = -1;
if (I->isShift())
ShiftAmountOpNum = 1;
else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == Intrinsic::fshl ||
II->getIntrinsicID() == Intrinsic::fshr)
ShiftAmountOpNum = 2;
}

if (ShiftAmountOpNum == -1)
return false;

auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
isVectorShiftByScalarCheap(I->getType())) {
Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
return true;
}

return false;
Expand Down
Loading

0 comments on commit 26e742f

Please sign in to comment.