From 30656cb12b95348860a75cd6dbcbcdbdc3ccb462 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 2 Jul 2024 21:04:34 +0200 Subject: [PATCH 1/2] TTI: Check legalization cost of min/max ISD nodes Instead of counting the cost of the assumed expansion. The AMDGPU costs for the i64 case look too high to me. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 22 +- .../Analysis/CostModel/AArch64/min-max.ll | 32 +-- .../CostModel/AMDGPU/arith-sminmax.ll | 228 +++++++----------- .../CostModel/AMDGPU/arith-uminmax.ll | 228 +++++++----------- llvm/test/Analysis/CostModel/ARM/cmps.ll | 12 +- .../Analysis/CostModel/ARM/reduce-smax.ll | 78 +++--- .../Analysis/CostModel/ARM/reduce-smin.ll | 78 +++--- .../Analysis/CostModel/ARM/reduce-umax.ll | 78 +++--- .../Analysis/CostModel/ARM/reduce-umin.ll | 78 +++--- .../SLPVectorizer/AMDGPU/min_max.ll | 76 ++++-- 10 files changed, 426 insertions(+), 484 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 93086d4ac2c8d..bf6f88a88365d 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2135,21 +2135,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } case Intrinsic::smax: + ISD = ISD::SMAX; + break; case Intrinsic::smin: + ISD = ISD::SMIN; + break; case Intrinsic::umax: - case Intrinsic::umin: { - // minmax(X,Y) = select(icmp(X,Y),X,Y) - Type *CondTy = RetTy->getWithNewBitWidth(1); - bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin; - CmpInst::Predicate Pred = - IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - return Cost; - } + ISD = ISD::UMAX; + break; + case Intrinsic::umin: + ISD = ISD::UMIN; + break; case Intrinsic::sadd_sat: ISD = ISD::SADDSAT; break; diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll index 013d4876c9f37..2f18918d27efe 100644 --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -6,10 +6,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @umin() { ; CHECK-LABEL: 'umin' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umin.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umin.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -53,10 +53,10 @@ define void @umin() { define void @umax() { ; CHECK-LABEL: 'umax' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umax.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umax.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -100,10 +100,10 @@ define void @umax() { define void @smin() { ; CHECK-LABEL: 'smin' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smin.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -147,10 +147,10 @@ define void @smin() { define void @smax() { ; CHECK-LABEL: 'smax' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smax.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll index c73b8b716bc54..900b1852b3c1d 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll @@ -38,79 +38,54 @@ declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>) define i32 @smax(i32 %arg) { ; FAST-LABEL: 'smax' -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'smax' -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef -; -; ALL-SIZE-LABEL: 'smax' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -169,79 +144,54 @@ declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>) define i32 @smin(i32 %arg) { ; FAST-LABEL: 'smin' -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'smin' -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef -; -; ALL-SIZE-LABEL: 'smin' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -271,3 +221,5 @@ define i32 @smin(i32 %arg) { ret i32 undef } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ALL-SIZE: {{.*}} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll index 68687743d75b6..f2080968617e9 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll @@ -38,79 +38,54 @@ declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>) define i32 @umax(i32 %arg) { ; FAST-LABEL: 'umax' -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'umax' -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef -; -; ALL-SIZE-LABEL: 'umax' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -169,79 +144,54 @@ declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>) define i32 @umin(i32 %arg) { ; FAST-LABEL: 'umin' -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'umin' -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef -; -; ALL-SIZE-LABEL: 'umin' -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) @@ -271,3 +221,5 @@ define i32 @umin(i32 %arg) { ret i32 undef } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ALL-SIZE: {{.*}} diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll index 184b7076d02be..7e59183608dfb 100644 --- a/llvm/test/Analysis/CostModel/ARM/cmps.ll +++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll @@ -197,7 +197,7 @@ define void @minmax() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -212,7 +212,7 @@ define void @minmax() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -227,7 +227,7 @@ define void @minmax() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -257,7 +257,7 @@ define void @minmax() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -272,7 +272,7 @@ define void @minmax() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -287,7 +287,7 @@ define void @minmax() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll index 4ef35f1547896..f1eccc120c572 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) @@ -40,15 +40,15 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) @@ -73,17 +73,17 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -110,19 +110,19 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll index f8bce922abcd0..818bf4d2b6735 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) @@ -40,15 +40,15 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) @@ -73,17 +73,17 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -110,19 +110,19 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll index 24f76e60cd310..c9a67ccdfb859 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) @@ -40,15 +40,15 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) @@ -73,17 +73,17 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -110,19 +110,19 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll index ee7025cf5db25..79a11f7d9bf6b 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) @@ -40,15 +40,15 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) @@ -73,17 +73,17 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -110,19 +110,19 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' -; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll index f44705a925d5a..46c6c10125b95 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll @@ -18,8 +18,15 @@ define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v2i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) -; GFX8-NEXT: ret <2 x i16> [[TMP0]] +; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX8-NEXT: ret <2 x i16> [[INS_1]] ; ; GFX9-LABEL: @uadd_sat_v2i16( ; GFX9-NEXT: bb: @@ -53,8 +60,15 @@ define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { ; ; GFX8-LABEL: @usub_sat_v2i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) -; GFX8-NEXT: ret <2 x i16> [[TMP0]] +; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX8-NEXT: ret <2 x i16> [[INS_1]] ; ; GFX9-LABEL: @usub_sat_v2i16( ; GFX9-NEXT: bb: @@ -88,8 +102,15 @@ define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { ; ; GFX8-LABEL: @sadd_sat_v2i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) -; GFX8-NEXT: ret <2 x i16> [[TMP0]] +; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX8-NEXT: ret <2 x i16> [[INS_1]] ; ; GFX9-LABEL: @sadd_sat_v2i16( ; GFX9-NEXT: bb: @@ -123,8 +144,15 @@ define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { ; ; GFX8-LABEL: @ssub_sat_v2i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) -; GFX8-NEXT: ret <2 x i16> [[TMP0]] +; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 +; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 +; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 +; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 +; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX8-NEXT: ret <2 x i16> [[INS_1]] ; ; GFX9-LABEL: @ssub_sat_v2i16( ; GFX9-NEXT: bb: @@ -262,11 +290,18 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v3i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 -; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 -; GFX8-NEXT: [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]]) +; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0 +; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1 +; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2 +; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0 +; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1 +; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2 +; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2 +; GFX8-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX8-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 ; GFX8-NEXT: ret <3 x i16> [[INS_2]] ; ; GFX9-LABEL: @uadd_sat_v3i16( @@ -317,11 +352,18 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { ; ; GFX8-LABEL: @uadd_sat_v4i16( ; GFX8-NEXT: bb: -; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]]) -; GFX8-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]]) -; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> -; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> +; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0 +; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1 +; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0 +; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1 +; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) +; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) +; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]]) +; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> +; GFX8-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0 +; GFX8-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 +; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> +; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[INS_1]], <4 x i16> [[TMP2]], <4 x i32> ; GFX8-NEXT: ret <4 x i16> [[INS_31]] ; ; GFX9-LABEL: @uadd_sat_v4i16( From 7d02cdf6752c89812338c02d4b508f61ac20a48f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:06:05 +0400 Subject: [PATCH 2/2] Preserve default expansion logic --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 16 +++++ .../Analysis/CostModel/AArch64/min-max.ll | 32 ++++----- .../CostModel/AMDGPU/arith-sminmax.ll | 64 +++++++++--------- .../CostModel/AMDGPU/arith-uminmax.ll | 64 +++++++++--------- llvm/test/Analysis/CostModel/ARM/cmps.ll | 8 +-- .../Analysis/CostModel/ARM/reduce-smax.ll | 66 +++++++++---------- .../Analysis/CostModel/ARM/reduce-smin.ll | 66 +++++++++---------- .../Analysis/CostModel/ARM/reduce-umax.ll | 66 +++++++++---------- .../Analysis/CostModel/ARM/reduce-umin.ll | 66 +++++++++---------- 9 files changed, 232 insertions(+), 216 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index bf6f88a88365d..c0ee50db37e67 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2341,6 +2341,22 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) + thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind); } + case Intrinsic::smin: + case Intrinsic::smax: + case Intrinsic::umin: + case Intrinsic::umax: { + // minmax(X,Y) = select(icmp(X,Y),X,Y) + Type *CondTy = RetTy->getWithNewBitWidth(1); + bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin; + CmpInst::Predicate Pred = + IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + return Cost; + } case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { // Assume a default expansion. diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll index 2f18918d27efe..013d4876c9f37 100644 --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -6,10 +6,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @umin() { ; CHECK-LABEL: 'umin' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umin.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umin.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -53,10 +53,10 @@ define void @umin() { define void @umax() { ; CHECK-LABEL: 'umax' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.umax.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.umax.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -100,10 +100,10 @@ define void @umax() { define void @smin() { ; CHECK-LABEL: 'smin' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smin.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) @@ -147,10 +147,10 @@ define void @smin() { define void @smax() { ; CHECK-LABEL: 'smax' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call <3 x i8> @llvm.smax.v3i8(<3 x i8> undef, <3 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll index 900b1852b3c1d..8afb26722cc16 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll @@ -38,15 +38,15 @@ declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>) define i32 @smax(i32 %arg) { ; FAST-LABEL: 'smax' -; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) @@ -63,15 +63,15 @@ define i32 @smax(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'smax' -; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) @@ -144,15 +144,15 @@ declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>) define i32 @smin(i32 %arg) { ; FAST-LABEL: 'smin' -; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) @@ -169,15 +169,15 @@ define i32 @smin(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'smin' -; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll index f2080968617e9..2b3728b556d9e 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll @@ -38,15 +38,15 @@ declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>) define i32 @umax(i32 %arg) { ; FAST-LABEL: 'umax' -; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) @@ -63,15 +63,15 @@ define i32 @umax(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'umax' -; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) @@ -144,15 +144,15 @@ declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>) define i32 @umin(i32 %arg) { ; FAST-LABEL: 'umin' -; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) -; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) ; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) @@ -169,15 +169,15 @@ define i32 @umin(i32 %arg) { ; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef ; ; SLOW-LABEL: 'umin' -; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) ; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll index 7e59183608dfb..e86a1b95a7b36 100644 --- a/llvm/test/Analysis/CostModel/ARM/cmps.ll +++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll @@ -197,7 +197,7 @@ define void @minmax() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -212,7 +212,7 @@ define void @minmax() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -257,7 +257,7 @@ define void @minmax() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef @@ -272,7 +272,7 @@ define void @minmax() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll index f1eccc120c572..7dcab51e0a1cf 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) @@ -40,11 +40,11 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) @@ -73,12 +73,12 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) @@ -110,13 +110,13 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll index 818bf4d2b6735..617c6b4605189 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) @@ -40,11 +40,11 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) @@ -73,12 +73,12 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) @@ -110,13 +110,13 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll index c9a67ccdfb859..764034d18bee0 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) @@ -40,11 +40,11 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) @@ -73,12 +73,12 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) @@ -110,13 +110,13 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll index 79a11f7d9bf6b..b5431f63bdca9 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll @@ -8,26 +8,26 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 403 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) @@ -40,11 +40,11 @@ define i32 @reduce_i64(i32 %arg) { define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' @@ -56,7 +56,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) @@ -73,12 +73,12 @@ define i32 @reduce_i32(i32 %arg) { define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' @@ -91,7 +91,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) @@ -110,13 +110,13 @@ define i32 @reduce_i16(i32 %arg) { define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' -; V8M-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 226 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 946 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) -; V8M-NEXT: Cost Model: Found an estimated cost of 1906 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 187 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 379 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; V8M-NEXT: Cost Model: Found an estimated cost of 763 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' @@ -130,7 +130,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef)