From 7b70fb9c7c1f77b4b6a6a864b2d4841d1fc4df9d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 26 Aug 2024 13:57:49 +0100 Subject: [PATCH] [VPlan] Implement VPInterleaveRecipe::computeCost. Implement computing costs for VPInterleaveRecipe. --- llvm/lib/Transforms/Vectorize/VPlan.h | 4 +++ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 33 +++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 36a1aa08654d5..2ab0a1826bd21 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2165,6 +2165,10 @@ class VPInterleaveRecipe : public VPRecipeBase { /// Generate the wide load or store, and shuffles. void execute(VPTransformState &State) override; + /// Return the cost of this VPInterleaveRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fe1325f416300..b1fcc13dc3471 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -269,8 +269,6 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, static Instruction *getInstructionForCost(const VPRecipeBase *R) { if (auto *S = dyn_cast(R)) return dyn_cast_or_null(S->getUnderlyingValue()); - if (auto *IG = dyn_cast(R)) - return IG->getInsertPos(); if (auto *WidenMem = dyn_cast(R)) return &WidenMem->getIngredient(); return nullptr; @@ -2627,6 +2625,37 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { } } +InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + Instruction *I = getInsertPos(); + Type *ValTy = Ctx.Types.inferScalarType( + getNumDefinedValues() > 0 ? getVPValue(0) : getStoredValues()[0]); + auto *VectorTy = cast(ToVectorTy(ValTy, VF)); + unsigned AS = getLoadStoreAddressSpace(I); + enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + + unsigned InterleaveFactor = IG->getFactor(); + auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); + + // Holds the indices of existing members in the interleaved group. + SmallVector Indices; + for (unsigned IF = 0; IF < InterleaveFactor; IF++) + if (IG->getMember(IF)) + Indices.push_back(IF); + + // Calculate the cost of the whole interleaved group. + InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost( + I->getOpcode(), WideVecTy, IG->getFactor(), Indices, IG->getAlign(), AS, + CostKind, getMask(), NeedsMaskForGaps); + + if (!IG->isReverse()) + return Cost; + + return Cost + IG->getNumMembers() * + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, + VectorTy, std::nullopt, CostKind, 0); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {