diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 1e03209e888bf..c4ba8e9857dc4 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -961,12 +961,10 @@ class TargetTransformInfo { TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, ArrayRef VL = {}) const; - /// Estimate the overhead of scalarizing an instructions unique - /// non-constant operands. The (potentially vector) types to use for each of - /// argument are passes via Tys. + /// Estimate the overhead of scalarizing operands with the given types. The + /// (potentially vector) types to use for each of argument are passes via Tys. LLVM_ABI InstructionCost getOperandsScalarizationOverhead( - ArrayRef Args, ArrayRef Tys, - TTI::TargetCostKind CostKind) const; + ArrayRef Tys, TTI::TargetCostKind CostKind) const; /// If target has efficient vector element load/store instructions, it can /// return true here so that insertion/extraction costs are not added to diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 252acf381a8e1..43813d2f3acb5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -459,8 +459,7 @@ class TargetTransformInfoImplBase { } virtual InstructionCost - getOperandsScalarizationOverhead(ArrayRef Args, - ArrayRef Tys, + getOperandsScalarizationOverhead(ArrayRef Tys, TTI::TargetCostKind CostKind) const { return 0; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 27320b510b950..0a10b51f97c63 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -18,6 +18,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" @@ -347,6 +348,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } + /// Filter out constant and duplicated entries in \p Ops and return a vector + /// containing the types from \p Tys corresponding to the remaining operands. + static SmallVector + filterConstantAndDuplicatedOperands(ArrayRef Ops, + ArrayRef Tys) { + SmallPtrSet UniqueOperands; + SmallVector FilteredTys; + for (const auto &[Op, Ty] : zip_equal(Ops, Tys)) { + if (isa(Op) || !UniqueOperands.insert(Op).second) + continue; + FilteredTys.push_back(Ty); + } + return FilteredTys; + } + protected: explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) : BaseT(DL) {} @@ -935,29 +951,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { CostKind); } - /// Estimate the overhead of scalarizing an instructions unique - /// non-constant operands. The (potentially vector) types to use for each of + /// Estimate the overhead of scalarizing an instruction's + /// operands. The (potentially vector) types to use for each of /// argument are passes via Tys. InstructionCost getOperandsScalarizationOverhead( - ArrayRef Args, ArrayRef Tys, - TTI::TargetCostKind CostKind) const override { - assert(Args.size() == Tys.size() && "Expected matching Args and Tys"); - + ArrayRef Tys, TTI::TargetCostKind CostKind) const override { InstructionCost Cost = 0; - SmallPtrSet UniqueOperands; - for (int I = 0, E = Args.size(); I != E; I++) { + for (Type *Ty : Tys) { // Disregard things like metadata arguments. - const Value *A = Args[I]; - Type *Ty = Tys[I]; if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() && !Ty->isPtrOrPtrVectorTy()) continue; - if (!isa(A) && UniqueOperands.insert(A).second) { - if (auto *VecTy = dyn_cast(Ty)) - Cost += getScalarizationOverhead(VecTy, /*Insert*/ false, - /*Extract*/ true, CostKind); - } + if (auto *VecTy = dyn_cast(Ty)) + Cost += getScalarizationOverhead(VecTy, /*Insert*/ false, + /*Extract*/ true, CostKind); } return Cost; @@ -974,7 +982,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { InstructionCost Cost = getScalarizationOverhead( RetTy, /*Insert*/ true, /*Extract*/ false, CostKind); if (!Args.empty()) - Cost += getOperandsScalarizationOverhead(Args, Tys, CostKind); + Cost += getOperandsScalarizationOverhead( + filterConstantAndDuplicatedOperands(Args, Tys), CostKind); else // When no information on arguments is provided, we add the cost // associated with one argument as a heuristic. @@ -2170,8 +2179,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /*Insert=*/true, /*Extract=*/false, CostKind); } } - ScalarizationCost += - getOperandsScalarizationOverhead(Args, ICA.getArgTypes(), CostKind); + ScalarizationCost += getOperandsScalarizationOverhead( + filterConstantAndDuplicatedOperands(Args, ICA.getArgTypes()), + CostKind); } IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 323ab8b1ddad1..4ac8f03e6dbf5 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -637,9 +637,8 @@ InstructionCost TargetTransformInfo::getScalarizationOverhead( } InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead( - ArrayRef Args, ArrayRef Tys, - TTI::TargetCostKind CostKind) const { - return TTIImpl->getOperandsScalarizationOverhead(Args, Tys, CostKind); + ArrayRef Tys, TTI::TargetCostKind CostKind) const { + return TTIImpl->getOperandsScalarizationOverhead(Tys, CostKind); } bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5e7f6523cd86d..7fc87a0b49f70 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1697,8 +1697,16 @@ class LoopVectorizationCostModel { /// Returns a range containing only operands needing to be extracted. SmallVector filterExtractingOperands(Instruction::op_range Ops, ElementCount VF) const { - return SmallVector(make_filter_range( - Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); })); + + SmallPtrSet UniqueOperands; + SmallVector Res; + for (Value *Op : Ops) { + if (isa(Op) || !UniqueOperands.insert(Op).second || + !needsExtract(Op, VF)) + continue; + Res.push_back(Op); + } + return Res; } public: @@ -5610,8 +5618,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, SmallVector Tys; for (auto *V : filterExtractingOperands(Ops, VF)) Tys.push_back(maybeVectorizeType(V->getType(), VF)); - return Cost + TTI.getOperandsScalarizationOverhead( - filterExtractingOperands(Ops, VF), Tys, CostKind); + return Cost + TTI.getOperandsScalarizationOverhead(Tys, CostKind); } void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {