From 24e08a345e15d31eae88c28b1a9ddcdc77d6a100 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 2 Feb 2025 21:15:46 +0000 Subject: [PATCH 1/4] [LV] Add test checking costs of some VPInstructions. --- .../X86/CostModel/vpinstruction-cost.ll | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll new file mode 100644 index 0000000000000..25737bbb0096a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of" +; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + +define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) { +; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction' +; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1 +; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] +; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 +; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> +; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> +; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src> +; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5> +; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4> +; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128> +; CHECK: Cost of 0 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1> +; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or> +; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst> +; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c> +; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> +; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK: Cost of 0 for VF 2: vector loop backedge +; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 +; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] +; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 +; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> +; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4> +; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src> +; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5> +; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4> +; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128> +; CHECK: Cost of 0 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1> +; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or> +; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst> +; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c> +; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> +; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK: Cost of 0 for VF 4: vector loop backedge +; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1 +; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] +; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32 +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %g.src = getelementptr inbounds i64, ptr %src, i64 %iv + %l = load i64, ptr %g.src + %iv.4 = add nuw nsw i64 %iv, 4 + %c = icmp ule i64 %l, 128 + br i1 %c, label %loop.then, label %loop.latch + +loop.then: + %or = or disjoint i64 %iv.4, 1 + %g.dst = getelementptr inbounds i64, ptr %dst, i64 %or + store i64 %iv.4, ptr %g.dst, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 32 + br i1 %exitcond, label %exit, label %loop.header + +exit: + ret void +} From fddabf6128721affb2b3b89a5ac15897b4dc2f66 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 2 Feb 2025 21:17:51 +0000 Subject: [PATCH 2/4] [VPlan] Compute cost for binary op VPInstruction with underlying values. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 21 +++++++++++++++++++ .../X86/CostModel/vpinstruction-cost.ll | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b734ddfce788e..f7f547075e1e0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -782,6 +782,27 @@ void VPInstruction::execute(VPTransformState &State) { /*IsScalar*/ GeneratesPerFirstLaneOnly); } +InstructionCost VPInstruction::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (Instruction::isBinaryOp(getOpcode())) { + if (!getUnderlyingValue()) + return 0; + + assert(!doesGeneratePerAllLanes() && + "Should only generate a vector value or single scalar, not scalars " + "for all lanes."); + Type *ResTy = Ctx.Types.inferScalarType(this); + if (!vputils::onlyFirstLaneUsed(this)) + ResTy = toVectorTy(ResTy, VF); + + return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); + } + + assert(!getUnderlyingValue() && + "unexpected VPInstruction without underlying value"); + return 0; +} + bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { if (Instruction::isBinaryOp(getOpcode())) return false; diff --git a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll index 25737bbb0096a..bb85b88f181f7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/CostModel/vpinstruction-cost.ll @@ -18,7 +18,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4> ; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128> -; CHECK: Cost of 0 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1> +; CHECK: Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1> ; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or> ; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst> ; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c> @@ -36,7 +36,7 @@ define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) ; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5> ; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4> ; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128> -; CHECK: Cost of 0 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1> +; CHECK: Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1> ; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or> ; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst> ; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c> From 9a955b1baa9e2fa3fb677f26c00f6fde6bccc6a0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 Feb 2025 21:52:36 +0000 Subject: [PATCH 3/4] !fixup address latest comments, thanks! --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f7f547075e1e0..26145000b5ae2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -785,8 +785,11 @@ void VPInstruction::execute(VPTransformState &State) { InstructionCost VPInstruction::computeCost(ElementCount VF, VPCostContext &Ctx) const { if (Instruction::isBinaryOp(getOpcode())) { - if (!getUnderlyingValue()) + if (!getUnderlyingValue()) { + // TODO: Compute cost for VPInstructions without underlying values once + // the legacy cost model has been retired. return 0; + } assert(!doesGeneratePerAllLanes() && "Should only generate a vector value or single scalar, not scalars " @@ -798,8 +801,10 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); } + // TODO: Compute cost other VPInstructions once the legacy cost model has + // been retired. assert(!getUnderlyingValue() && - "unexpected VPInstruction without underlying value"); + "unexpected VPInstruction witht underlying value"); return 0; } From e37c2ad55ab9f5821f087bb618f9659014e1d56a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 6 Feb 2025 19:43:49 +0000 Subject: [PATCH 4/4] !fixup update after rebase. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 48 ++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 26145000b5ae2..eb1aac4523752 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -713,6 +713,23 @@ Value *VPInstruction::generate(VPTransformState &State) { InstructionCost VPInstruction::computeCost(ElementCount VF, VPCostContext &Ctx) const { + if (Instruction::isBinaryOp(getOpcode())) { + if (!getUnderlyingValue()) { + // TODO: Compute cost for VPInstructions without underlying values once + // the legacy cost model has been retired. + return 0; + } + + assert(!doesGeneratePerAllLanes() && + "Should only generate a vector value or single scalar, not scalars " + "for all lanes."); + Type *ResTy = Ctx.Types.inferScalarType(this); + if (!vputils::onlyFirstLaneUsed(this)) + ResTy = toVectorTy(ResTy, VF); + + return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); + } + switch (getOpcode()) { case VPInstruction::AnyOf: { auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); @@ -720,7 +737,10 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, Instruction::Or, cast(VecTy), std::nullopt, Ctx.CostKind); } default: - // TODO: Fill out other opcodes! + // TODO: Compute cost other VPInstructions once the legacy cost model has + // been retired. + assert(!getUnderlyingValue() && + "unexpected VPInstruction witht underlying value"); return 0; } } @@ -782,32 +802,6 @@ void VPInstruction::execute(VPTransformState &State) { /*IsScalar*/ GeneratesPerFirstLaneOnly); } -InstructionCost VPInstruction::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - if (Instruction::isBinaryOp(getOpcode())) { - if (!getUnderlyingValue()) { - // TODO: Compute cost for VPInstructions without underlying values once - // the legacy cost model has been retired. - return 0; - } - - assert(!doesGeneratePerAllLanes() && - "Should only generate a vector value or single scalar, not scalars " - "for all lanes."); - Type *ResTy = Ctx.Types.inferScalarType(this); - if (!vputils::onlyFirstLaneUsed(this)) - ResTy = toVectorTy(ResTy, VF); - - return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); - } - - // TODO: Compute cost other VPInstructions once the legacy cost model has - // been retired. - assert(!getUnderlyingValue() && - "unexpected VPInstruction witht underlying value"); - return 0; -} - bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { if (Instruction::isBinaryOp(getOpcode())) return false;