[x86][CGP] improve sinking of splatted vector shift amount operand

Expands on the enablement of the shouldSinkOperands() TLI hook in: D79718 The last codegen/IR test diff shows what I suspected could happen - we were sinking all splat shift operands into a loop. But that's not what we want in general; we only want to sink the *shift amount* operand if it is a splat. Differential Revision: https://reviews.llvm.org/D79827
haoNoQ · May 14, 2020 · 26e742f · 26e742f
1 parent c5ff403
commit 26e742f
Show file tree

Hide file tree

Showing 4 changed files with 139 additions and 215 deletions.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -392,8 +392,6 @@ class TypePromotionTransaction;
     bool optimizeLoadExt(LoadInst *Load);
     bool optimizeShiftInst(BinaryOperator *BO);
     bool optimizeSelectInst(SelectInst *SI);
-    bool sinkShuffleVectorToShift(ShuffleVectorInst *SVI);
-    bool convertSplatType(ShuffleVectorInst *SVI);
     bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
     bool optimizeSwitchInst(SwitchInst *SI);
     bool optimizeExtractElementInst(Instruction *Inst);
@@ -6417,66 +6415,10 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   return true;
 }
 
-/// Some targets have expensive vector shifts if the lanes aren't all the same
-/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
-/// it's often worth sinking a shufflevector splat down to its use so that
-/// codegen can spot all lanes are identical.
-bool CodeGenPrepare::sinkShuffleVectorToShift(ShuffleVectorInst *SVI) {
-  BasicBlock *DefBB = SVI->getParent();
-
-  // Only do this xform if variable vector shifts are particularly expensive.
-  if (!TLI->isVectorShiftByScalarCheap(SVI->getType()))
-    return false;
-
-  // We only expect better codegen by sinking a shuffle if we can recognise a
-  // constant splat.
-  if (getSplatIndex(SVI->getShuffleMask()) < 0)
-    return false;
-
-  // InsertedShuffles - Only insert a shuffle in each block once.
-  DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
-
-  bool MadeChange = false;
-  for (User *U : SVI->users()) {
-    Instruction *UI = cast<Instruction>(U);
-
-    // Figure out which BB this ext is used in.
-    BasicBlock *UserBB = UI->getParent();
-    if (UserBB == DefBB) continue;
-
-    // For now only apply this when the splat is used by a shift instruction.
-    if (!UI->isShift()) continue;
-
-    // Everything checks out, sink the shuffle if the user's block doesn't
-    // already have a copy.
-    Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
-
-    if (!InsertedShuffle) {
-      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
-      assert(InsertPt != UserBB->end());
-      InsertedShuffle =
-          new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
-                                SVI->getShuffleMask(), "", &*InsertPt);
-      InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
-    }
-
-    UI->replaceUsesOfWith(SVI, InsertedShuffle);
-    MadeChange = true;
-  }
-
-  // If we removed all uses, nuke the shuffle.
-  if (SVI->use_empty()) {
-    SVI->eraseFromParent();
-    MadeChange = true;
-  }
-
-  return MadeChange;
-}
-
 /// Some targets only accept certain types for splat inputs. For example a VDUP
 /// in MVE takes a GPR (integer) register, and the instruction that incorporate
 /// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
-bool CodeGenPrepare::convertSplatType(ShuffleVectorInst *SVI) {
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   if (!match(SVI,
              m_ShuffleVector(m_InsertElement(m_Undef(), m_Value(), m_ZeroInt()),
                              m_Undef(), m_ZeroMask())))
@@ -6516,14 +6458,6 @@ bool CodeGenPrepare::convertSplatType(ShuffleVectorInst *SVI) {
   return true;
 }
 
-bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
-  if (sinkShuffleVectorToShift(SVI))
-    return true;
-  if (convertSplatType(SVI))
-    return true;
-  return false;
-}
-
 bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
   // If the operands of I can be folded into a target instruction together with
   // I, duplicate and sink them.

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30680,18 +30680,23 @@ bool X86TargetLowering::shouldSinkOperands(Instruction *I,
   // A uniform shift amount in a vector shift or funnel shift may be much
   // cheaper than a generic variable vector shift, so make that pattern visible
   // to SDAG by sinking the shuffle instruction next to the shift.
-  // TODO: This should handle normal shift opcodes too.
-  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
-    Intrinsic::ID ID = II->getIntrinsicID();
-    if (ID == Intrinsic::fshl || ID == Intrinsic::fshr) {
-      // The shift amount operand for these intrinsics is operand 2.
-      auto *Shuf = dyn_cast<ShuffleVectorInst>(II->getOperand(2));
-      if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
-          isVectorShiftByScalarCheap(I->getType())) {
-        Ops.push_back(&I->getOperandUse(2));
-        return true;
-      }
-    }
+  int ShiftAmountOpNum = -1;
+  if (I->isShift())
+    ShiftAmountOpNum = 1;
+  else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+    if (II->getIntrinsicID() == Intrinsic::fshl ||
+        II->getIntrinsicID() == Intrinsic::fshr)
+      ShiftAmountOpNum = 2;
+  }
+
+  if (ShiftAmountOpNum == -1)
+    return false;
+
+  auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
+  if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
+      isVectorShiftByScalarCheap(I->getType())) {
+    Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
+    return true;
   }
 
   return false;