@@ -1283,11 +1283,8 @@ class BoUpSLP {
12831283 /// Vectorize the tree but with the list of externally used values \p
12841284 /// ExternallyUsedValues. Values in this MapVector can be replaced but the
12851285 /// generated extractvalue instructions.
1286- /// \param ReplacedExternals containd list of replaced external values
1287- /// {scalar, replace} after emitting extractelement for external uses.
12881286 Value *
12891287 vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
1290- SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
12911288 Instruction *ReductionRoot = nullptr);
12921289
12931290 /// \returns the cost incurred by unwanted spills and fills, caused by
@@ -14222,14 +14219,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1422214219
1422314220Value *BoUpSLP::vectorizeTree() {
1422414221 ExtraValueToDebugLocsMap ExternallyUsedValues;
14225- SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
14226- return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
14222+ return vectorizeTree(ExternallyUsedValues);
1422714223}
1422814224
14229- Value *BoUpSLP::vectorizeTree(
14230- const ExtraValueToDebugLocsMap &ExternallyUsedValues,
14231- SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
14232- Instruction *ReductionRoot) {
14225+ Value *
14226+ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
14227+ Instruction *ReductionRoot) {
1423314228 // All blocks must be scheduled before any instructions are inserted.
1423414229 for (auto &BSIter : BlocksSchedules) {
1423514230 scheduleBlock(BSIter.second.get());
@@ -14373,6 +14368,7 @@ Value *BoUpSLP::vectorizeTree(
1437314368 SmallDenseSet<Value *, 4> UsedInserts;
1437414369 DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
1437514370 SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
14371+ SmallDenseSet<ExtractElementInst *, 4> IgnoredExtracts;
1437614372 // Extract all of the elements with the external uses.
1437714373 for (const auto &ExternalUse : ExternalUses) {
1437814374 Value *Scalar = ExternalUse.Scalar;
@@ -14426,11 +14422,16 @@ Value *BoUpSLP::vectorizeTree(
1442614422 if (ReplaceInst) {
1442714423 // Leave the instruction as is, if it cheaper extracts and all
1442814424 // operands are scalar.
14429- auto *CloneInst = Inst->clone();
14430- CloneInst->insertBefore(Inst);
14431- if (Inst->hasName())
14432- CloneInst->takeName(Inst);
14433- Ex = CloneInst;
14425+ if (auto *EE = dyn_cast<ExtractElementInst>(Inst)) {
14426+ IgnoredExtracts.insert(EE);
14427+ Ex = EE;
14428+ } else {
14429+ auto *CloneInst = Inst->clone();
14430+ CloneInst->insertBefore(Inst);
14431+ if (Inst->hasName())
14432+ CloneInst->takeName(Inst);
14433+ Ex = CloneInst;
14434+ }
1443414435 } else if (auto *ES = dyn_cast<ExtractElementInst>(Scalar);
1443514436 ES && isa<Instruction>(Vec)) {
1443614437 Value *V = ES->getVectorOperand();
@@ -14530,8 +14531,12 @@ Value *BoUpSLP::vectorizeTree(
1453014531 }
1453114532 Value *NewInst = ExtractAndExtendIfNeeded(Vec);
1453214533 // Required to update internally referenced instructions.
14533- Scalar->replaceAllUsesWith(NewInst);
14534- ReplacedExternals.emplace_back(Scalar, NewInst);
14534+ if (Scalar != NewInst) {
14535+ assert((!isa<ExtractElementInst>(Scalar) ||
14536+ !IgnoredExtracts.contains(cast<ExtractElementInst>(Scalar))) &&
14537+ "Extractelements should not be replaced.");
14538+ Scalar->replaceAllUsesWith(NewInst);
14539+ }
1453514540 continue;
1453614541 }
1453714542
@@ -14757,6 +14762,9 @@ Value *BoUpSLP::vectorizeTree(
1475714762 if (Entry->getOpcode() == Instruction::GetElementPtr &&
1475814763 !isa<GetElementPtrInst>(Scalar))
1475914764 continue;
14765+ if (auto *EE = dyn_cast<ExtractElementInst>(Scalar);
14766+ EE && IgnoredExtracts.contains(EE))
14767+ continue;
1476014768#ifndef NDEBUG
1476114769 Type *Ty = Scalar->getType();
1476214770 if (!Ty->isVoidTy()) {
@@ -17660,7 +17668,6 @@ class HorizontalReduction {
1766017668 // because of the vectorization.
1766117669 DenseMap<Value *, WeakTrackingVH> TrackedVals(ReducedVals.size() *
1766217670 ReducedVals.front().size());
17663- SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
1766417671
1766517672 // The compare instruction of a min/max is the insertion point for new
1766617673 // instructions and may be replaced with a new compare instruction.
@@ -17956,6 +17963,8 @@ class HorizontalReduction {
1795617963 if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
1795717964 continue;
1795817965 Value *RdxVal = Candidates[Cnt];
17966+ if (auto It = TrackedVals.find(RdxVal); It != TrackedVals.end())
17967+ RdxVal = It->second;
1795917968 if (!Visited.insert(RdxVal).second)
1796017969 continue;
1796117970 // Check if the scalar was vectorized as part of the vectorization
@@ -18024,8 +18033,8 @@ class HorizontalReduction {
1802418033 InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
1802518034
1802618035 // Vectorize a tree.
18027- Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues,
18028- ReplacedExternals , InsertPt);
18036+ Value *VectorizedRoot =
18037+ V.vectorizeTree(LocalExternallyUsedValues , InsertPt);
1802918038
1803018039 Builder.SetInsertPoint(InsertPt);
1803118040
0 commit comments