@@ -7056,16 +7056,19 @@ bool BoUpSLP::areAllUsersVectorized(
70567056
70577057static std::pair<InstructionCost, InstructionCost>
70587058getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
7059- TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
7060- ArrayRef<Type *> ArgTys) {
7059+ TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
70617060 Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
70627061
70637062 // Calculate the cost of the scalar and vector calls.
7063+ SmallVector<Type *, 4> VecTys;
7064+ for (Use &Arg : CI->args())
7065+ VecTys.push_back(
7066+ FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
70647067 FastMathFlags FMF;
70657068 if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
70667069 FMF = FPCI->getFastMathFlags();
70677070 SmallVector<const Value *> Arguments(CI->args());
7068- IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, ArgTys , FMF,
7071+ IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys , FMF,
70697072 dyn_cast<IntrinsicInst>(CI));
70707073 auto IntrinsicCost =
70717074 TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
@@ -7078,8 +7081,8 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
70787081 if (!CI->isNoBuiltin() && VecFunc) {
70797082 // Calculate the cost of the vector library call.
70807083 // If the corresponding vector call is cheaper, return its cost.
7081- LibCost =
7082- TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput);
7084+ LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
7085+ TTI::TCK_RecipThroughput);
70837086 }
70847087 return {IntrinsicCost, LibCost};
70857088}
@@ -8505,30 +8508,6 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const {
85058508 return TTI::CastContextHint::None;
85068509}
85078510
8508- /// Builds the arguments types vector for the given call instruction with the
8509- /// given \p ID for the specified vector factor.
8510- static SmallVector<Type *> buildIntrinsicArgTypes(const CallInst *CI,
8511- const Intrinsic::ID ID,
8512- const unsigned VF,
8513- unsigned MinBW) {
8514- SmallVector<Type *> ArgTys;
8515- for (auto [Idx, Arg] : enumerate(CI->args())) {
8516- if (ID != Intrinsic::not_intrinsic) {
8517- if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) {
8518- ArgTys.push_back(Arg->getType());
8519- continue;
8520- }
8521- if (MinBW > 0) {
8522- ArgTys.push_back(FixedVectorType::get(
8523- IntegerType::get(CI->getContext(), MinBW), VF));
8524- continue;
8525- }
8526- }
8527- ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF));
8528- }
8529- return ArgTys;
8530- }
8531-
85328511InstructionCost
85338512BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
85348513 SmallPtrSetImpl<Value *> &CheckedExtracts) {
@@ -9095,11 +9074,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
90959074 };
90969075 auto GetVectorCost = [=](InstructionCost CommonCost) {
90979076 auto *CI = cast<CallInst>(VL0);
9098- Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
9099- SmallVector<Type *> ArgTys =
9100- buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
9101- It != MinBWs.end() ? It->second.first : 0);
9102- auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
9077+ auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
91039078 return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost;
91049079 };
91059080 return GetCostDiff(GetScalarCost, GetVectorCost);
@@ -12571,10 +12546,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1257112546
1257212547 Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
1257312548
12574- SmallVector<Type *> ArgTys =
12575- buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(),
12576- It != MinBWs.end() ? It->second.first : 0);
12577- auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys);
12549+ auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
1257812550 bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
1257912551 VecCallCosts.first <= VecCallCosts.second;
1258012552
@@ -12583,20 +12555,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1258312555 SmallVector<Type *, 2> TysForDecl;
1258412556 // Add return type if intrinsic is overloaded on it.
1258512557 if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
12586- TysForDecl.push_back(VecTy);
12558+ TysForDecl.push_back(
12559+ FixedVectorType::get(CI->getType(), E->Scalars.size()));
1258712560 auto *CEI = cast<CallInst>(VL0);
1258812561 for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
1258912562 ValueList OpVL;
1259012563 // Some intrinsics have scalar arguments. This argument should not be
1259112564 // vectorized.
1259212565 if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
1259312566 ScalarArg = CEI->getArgOperand(I);
12594- // if decided to reduce bitwidth of abs intrinsic, it second argument
12595- // must be set false (do not return poison, if value issigned min).
12596- if (ID == Intrinsic::abs && It != MinBWs.end() &&
12597- It->second.first < DL->getTypeSizeInBits(CEI->getType()))
12598- ScalarArg = Builder.getFalse();
12599- OpVecs.push_back(ScalarArg);
12567+ OpVecs.push_back(CEI->getArgOperand(I));
1260012568 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
1260112569 TysForDecl.push_back(ScalarArg->getType());
1260212570 continue;
@@ -12609,13 +12577,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1260912577 }
1261012578 ScalarArg = CEI->getArgOperand(I);
1261112579 if (cast<VectorType>(OpVec->getType())->getElementType() !=
12612- ScalarArg->getType() &&
12613- It == MinBWs.end()) {
12580+ ScalarArg->getType()) {
1261412581 auto *CastTy = FixedVectorType::get(ScalarArg->getType(),
1261512582 VecTy->getNumElements());
1261612583 OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
12617- } else if (It != MinBWs.end()) {
12618- OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I));
1261912584 }
1262012585 LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
1262112586 OpVecs.push_back(OpVec);
@@ -14359,45 +14324,6 @@ bool BoUpSLP::collectValuesToDemote(
1435914324 return TryProcessInstruction(I, *ITE, BitWidth, Ops);
1436014325 }
1436114326
14362- case Instruction::Call: {
14363- auto *IC = dyn_cast<IntrinsicInst>(I);
14364- if (!IC)
14365- break;
14366- Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI);
14367- if (ID != Intrinsic::abs && ID != Intrinsic::smin &&
14368- ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax)
14369- break;
14370- SmallVector<Value *> Operands(1, I->getOperand(0));
14371- End = 1;
14372- if (ID != Intrinsic::abs) {
14373- Operands.push_back(I->getOperand(1));
14374- End = 2;
14375- }
14376- InstructionCost BestCost =
14377- std::numeric_limits<InstructionCost::CostType>::max();
14378- unsigned BestBitWidth = BitWidth;
14379- unsigned VF = ITE->Scalars.size();
14380- // Choose the best bitwidth based on cost estimations.
14381- auto Checker = [&](unsigned BitWidth, unsigned) {
14382- unsigned MinBW = PowerOf2Ceil(BitWidth);
14383- SmallVector<Type *> ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW);
14384- auto VecCallCosts = getVectorCallCosts(
14385- IC,
14386- FixedVectorType::get(IntegerType::get(IC->getContext(), MinBW), VF),
14387- TTI, TLI, ArgTys);
14388- InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second);
14389- if (Cost < BestCost) {
14390- BestCost = Cost;
14391- BestBitWidth = BitWidth;
14392- }
14393- return false;
14394- };
14395- [[maybe_unused]] bool NeedToExit;
14396- (void)AttemptCheckBitwidth(Checker, NeedToExit);
14397- BitWidth = BestBitWidth;
14398- return TryProcessInstruction(I, *ITE, BitWidth, Operands);
14399- }
14400-
1440114327 // Otherwise, conservatively give up.
1440214328 default:
1440314329 break;
0 commit comments