diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index f9eef60f77b7aca..5a92d6bab31a971 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1688,7 +1688,6 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Args, CxtI); - auto getConstantMatCost = [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost { if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand)) @@ -1760,8 +1759,14 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost( Op1Info, Op2Info, Args, CxtI); } - return ConstantMatCost + - LT.first * getRISCVInstructionCost(Op, LT.second, CostKind); + + InstructionCost InstrCost = getRISCVInstructionCost(Op, LT.second, CostKind); + // We use BasicTTIImpl to calculate scalar costs, which assumes floating point + // ops are twice as expensive as integer ops. Do the same for vectors so + // scalar floating point ops aren't cheaper than their vector equivalents. + if (Ty->isFPOrFPVectorTy()) + InstrCost *= 2; + return ConstantMatCost + LT.first * InstrCost; } // TODO: Deduplicate from TargetTransformInfoImplCRTPBase. diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll index d1e8bb015491e2b..5236f5a3bae9548 100644 --- a/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/arith-fp.ll @@ -8,36 +8,36 @@ define i32 @fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = fadd <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fadd <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = fadd <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fadd <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fadd <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fadd <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fadd <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fadd <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fadd <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fadd <1 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fadd <2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fadd <32 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fadd <1 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fadd <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fadd <1 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fadd <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = fadd half undef, undef @@ -88,36 +88,36 @@ define i32 @fsub() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = fsub <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fsub <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = fsub <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fsub <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fsub <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fsub undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fsub <1 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fsub <2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fsub <32 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fsub <1 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fsub <1 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fsub undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fsub undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = fsub half undef, undef @@ -168,36 +168,36 @@ define i32 @fmul() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = fmul <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fmul <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fmul <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fmul <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fmul <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = fmul <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fmul <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fmul <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fmul <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fmul undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fmul <1 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fmul <2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fmul <32 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fmul <1 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fmul <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fmul <1 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fmul undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fmul undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = fmul half undef, undef @@ -248,36 +248,36 @@ define i32 @fdiv() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = fdiv <1 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fdiv <2 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fdiv <4 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fdiv <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fdiv <16 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = fdiv <32 x half> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fdiv <1 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fdiv <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fdiv <1 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fdiv <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fdiv undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fdiv <1 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fdiv <2 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fdiv <32 x half> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fdiv <1 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fdiv <1 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fdiv undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fdiv undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = fdiv half undef, undef @@ -408,36 +408,36 @@ define i32 @fneg() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = fneg <1 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fneg <4 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fneg <8 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32F16 = fneg <32 x half> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV4F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV8F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV16F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32F16 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = fneg <1 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fneg <4 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV2F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F32 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = fneg <1 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fneg <4 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %NXV1F64 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F64 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F64 = fneg undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = fneg <1 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32F16 = fneg <32 x half> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV4F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV8F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV16F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV32F16 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = fneg <1 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV2F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV4F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV8F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV16F32 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = fneg <1 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <2 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV4F64 = fneg undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NXV8F64 = fneg undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F16 = fneg half undef diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll index 87ffb23dcb88e2e..67c081ba5d3c696 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll @@ -21,7 +21,7 @@ define void @unsupported_fp_ops( %vec, i32 %extraarg) { define void @powi( %vec) { ; CHECK-LABEL: 'powi' -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'powi' @@ -1383,73 +1383,73 @@ define void @reduce_fadd() { define void @vp_fadd(){ ; CHECK-LABEL: 'vp_fadd' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = fadd <2 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = fadd <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t5 = fadd <8 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t7 = fadd <16 x float> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = fadd <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = fadd <4 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = fadd <8 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = fadd <16 x double> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call @llvm.vp.fadd.nxv2f32( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t19 = call @llvm.vp.fadd.nxv4f32( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t20 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t21 = call @llvm.vp.fadd.nxv8f32( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t22 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t23 = call @llvm.vp.fadd.nxv16f32( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t24 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call @llvm.vp.fadd.nxv2f64( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call @llvm.vp.fadd.nxv4f64( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call @llvm.vp.fadd.nxv8f64( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = fadd undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call @llvm.vp.fadd.nxv16f64( undef, undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = fadd <2 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t3 = fadd <4 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t5 = fadd <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t7 = fadd <16 x float> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t9 = fadd <2 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t11 = fadd <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t13 = fadd <8 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t15 = fadd <16 x double> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call @llvm.vp.fadd.nxv2f32( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t18 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = call @llvm.vp.fadd.nxv4f32( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call @llvm.vp.fadd.nxv8f32( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t23 = call @llvm.vp.fadd.nxv16f32( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t24 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t25 = call @llvm.vp.fadd.nxv2f64( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t27 = call @llvm.vp.fadd.nxv4f64( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t29 = call @llvm.vp.fadd.nxv8f64( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = fadd undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t31 = call @llvm.vp.fadd.nxv16f64( undef, undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'vp_fadd' -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = fadd <2 x float> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = fadd <4 x float> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t5 = fadd <8 x float> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t7 = fadd <16 x float> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = fadd <2 x double> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = fadd <4 x double> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = fadd <8 x double> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = fadd <16 x double> undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call @llvm.vp.fadd.nxv2f32( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = fadd undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t19 = call @llvm.vp.fadd.nxv4f32( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t20 = fadd undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t21 = call @llvm.vp.fadd.nxv8f32( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t22 = fadd undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t23 = call @llvm.vp.fadd.nxv16f32( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t24 = fadd undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call @llvm.vp.fadd.nxv2f64( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = fadd undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call @llvm.vp.fadd.nxv4f64( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = fadd undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call @llvm.vp.fadd.nxv8f64( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = fadd undef, undef -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call @llvm.vp.fadd.nxv16f64( undef, undef, undef, i32 undef) -; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = fadd <2 x float> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t2 = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> undef, <4 x float> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t3 = fadd <4 x float> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t4 = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> undef, <8 x float> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t5 = fadd <8 x float> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t6 = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> undef, <16 x float> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t7 = fadd <16 x float> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> undef, <2 x double> undef, <2 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t9 = fadd <2 x double> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t11 = fadd <4 x double> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t12 = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> undef, <8 x double> undef, <8 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t13 = fadd <8 x double> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t14 = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> undef, <16 x double> undef, <16 x i1> undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t15 = fadd <16 x double> undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t17 = call @llvm.vp.fadd.nxv2f32( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t18 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t19 = call @llvm.vp.fadd.nxv4f32( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t20 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t21 = call @llvm.vp.fadd.nxv8f32( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t22 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t23 = call @llvm.vp.fadd.nxv16f32( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t24 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t25 = call @llvm.vp.fadd.nxv2f64( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t27 = call @llvm.vp.fadd.nxv4f64( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t29 = call @llvm.vp.fadd.nxv8f64( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = fadd undef, undef +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t31 = call @llvm.vp.fadd.nxv16f64( undef, undef, undef, i32 undef) +; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t32 = fadd undef, undef ; TYPEBASED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t0 = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> undef, <2 x float> undef, <2 x i1> undef, i32 undef) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index e50d7362365b813..a151232df0cd58e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -39,32 +39,32 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV32-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; RV32-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; RV32-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; RV32-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; RV32-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; RV32-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] -; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV32-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV32-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV32-NEXT: [[TMP8:%.*]] = add [[TMP7]], zeroinitializer +; RV32-NEXT: [[TMP9:%.*]] = mul [[TMP8]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV32-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; RV32-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP11]] +; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV32-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] -; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 -; RV32-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV32-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] -; RV32-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 -; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV32-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META0:![0-9]+]] +; RV32-NEXT: [[TMP14:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP15:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP15]] +; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP16]], i32 8, [[TMP14]], poison), !alias.scope [[META3:![0-9]+]] +; RV32-NEXT: [[TMP17:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV32-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] +; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -121,32 +121,32 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV64-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 -; RV64-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; RV64-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer -; RV64-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] -; RV64-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; RV64-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] -; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV64-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV64-NEXT: [[TMP7:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV64-NEXT: [[TMP8:%.*]] = add [[TMP7]], zeroinitializer +; RV64-NEXT: [[TMP9:%.*]] = mul [[TMP8]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] +; RV64-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; RV64-NEXT: [[TMP12:%.*]] = mul i64 16, [[TMP11]] +; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 ; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV64-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] -; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 -; RV64-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV64-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] -; RV64-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 -; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV64-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP13]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope [[META0:![0-9]+]] +; RV64-NEXT: [[TMP14:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP15:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP15]] +; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP16]], i32 8, [[TMP14]], poison), !alias.scope [[META3:![0-9]+]] +; RV64-NEXT: [[TMP17:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV64-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] +; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP18]], [[TMP19]], i32 8, [[TMP14]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 8e9713fecf29dbb..fc310f416308214 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -54,43 +54,46 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Loop does not require scalar epilogue ; CHECK-NEXT: LV: Loop does not require scalar epilogue ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count -; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count -; CHECK: ir-bb: -; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: vp<%2> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors -; CHECK: vector.ph: +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop -; CHECK: vector loop: { +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> -; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> -; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> -; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8> +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1> +; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%5>, ir<-1> +; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> +; CHECK-NEXT: vp<%6> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: WIDEN ir<%1> = load vp<%6> +; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> +; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> +; CHECK-NEXT: vp<%7> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: WIDEN store vp<%7>, ir<%add9> +; CHECK-NEXT: EMIT vp<%8> = add nuw vp<%3>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK: middle.block: -; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%10> = icmp eq vp<%2>, vp<%1> +; CHECK-NEXT: EMIT branch-on-cond vp<%10> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb +; CHECK-NEXT: ir-bb: ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph +; CHECK-NEXT: scalar.ph: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] @@ -134,7 +137,52 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop ; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 -; CHECK: LV: Interleaving disabled by the pass manager +; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: vp<%2> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8> +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1> +; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%5>, ir<-1> +; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> +; CHECK-NEXT: vp<%6> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: WIDEN ir<%13> = load vp<%6> +; CHECK-NEXT: WIDEN ir<%add9> = add ir<%13>, ir<1> +; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> +; CHECK-NEXT: vp<%7> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: WIDEN store vp<%7>, ir<%add9> +; CHECK-NEXT: EMIT vp<%8> = add nuw vp<%3>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%10> = icmp eq vp<%2>, vp<%1> +; CHECK-NEXT: EMIT branch-on-cond vp<%10> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: LV: Loop does not require scalar epilogue +; CHECK-NEXT: LV: Loop does not require scalar epilogue +; CHECK-NEXT: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Loop does not require scalar epilogue ; CHECK-NEXT: LV: Vectorizing: innermost loop. ; CHECK-EMPTY: @@ -193,7 +241,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom ; CHECK-NEXT: LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 -; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 +; CHECK-NEXT: LV: Found an estimated cost of 4 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom ; CHECK-NEXT: LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 @@ -210,37 +258,40 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Loop does not require scalar epilogue ; CHECK-NEXT: LV: Loop does not require scalar epilogue ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { -; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF -; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count -; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count -; CHECK: ir-bb: -; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: vp<%2> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) ; CHECK-NEXT: No successors -; CHECK: vector.ph: +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop -; CHECK: vector loop: { +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> -; CHECK-NEXT: vp<[[STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> -; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> -; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> -; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> -; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> -; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv1> -; CHECK-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> -; CHECK-NEXT: EMIT branch-on-count vp<[[IV_INC]]>, vp<[[VEC_TC]]> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8> +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1> +; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%5>, ir<-1> +; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> +; CHECK-NEXT: vp<%6> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: WIDEN ir<%1> = load vp<%6> +; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> +; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> +; CHECK-NEXT: vp<%7> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: WIDEN store vp<%7>, ir<%conv1> +; CHECK-NEXT: EMIT vp<%8> = add nuw vp<%3>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK: middle.block: -; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%10> = icmp eq vp<%2>, vp<%1> +; CHECK-NEXT: EMIT branch-on-cond vp<%10> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: @@ -255,7 +306,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom ; CHECK-NEXT: LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4 -; CHECK-NEXT: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 +; CHECK-NEXT: LV: Found an estimated cost of 4 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00 ; CHECK-NEXT: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom ; CHECK-NEXT: LV: Found an estimated cost of 13 for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4 ; CHECK-NEXT: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1 @@ -281,7 +332,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: The target has 31 registers of RISCV::GPRRC register class ; CHECK-NEXT: LV: The target has 32 registers of RISCV::VRRC register class ; CHECK-NEXT: LV: Loop does not require scalar epilogue -; CHECK-NEXT: LV: Loop cost is 32 +; CHECK-NEXT: LV: Loop cost is 34 ; CHECK-NEXT: LV: IC is 1 ; CHECK-NEXT: LV: VF is vscale x 4 ; CHECK-NEXT: LV: Not Interleaving. @@ -290,7 +341,52 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop ; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 -; CHECK: LV: Interleaving disabled by the pass manager +; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: vp<%2> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8> +; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1> +; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%5>, ir<-1> +; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> +; CHECK-NEXT: vp<%6> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: WIDEN ir<%13> = load vp<%6> +; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%13>, ir<1.000000e+00> +; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> +; CHECK-NEXT: vp<%7> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: WIDEN store vp<%7>, ir<%conv1> +; CHECK-NEXT: EMIT vp<%8> = add nuw vp<%3>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<%10> = icmp eq vp<%2>, vp<%1> +; CHECK-NEXT: EMIT branch-on-cond vp<%10> +; CHECK-NEXT: Successor(s): ir-bb, scalar.ph +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: scalar.ph: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: LV: Loop does not require scalar epilogue +; CHECK-NEXT: LV: Loop does not require scalar epilogue +; CHECK-NEXT: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Loop does not require scalar epilogue ; CHECK-NEXT: LV: Vectorizing: innermost loop. ; diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll index 03fbb5e5a4674ce..97608174b524d75 100644 --- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll @@ -1130,14 +1130,23 @@ define @umax_nxv1i64_anymask( %x, i64 %y, < } define @fadd_nxv1f32_allonesmask( %x, float %y, i32 zeroext %evl) { -; ALL-LABEL: @fadd_nxv1f32_allonesmask( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.fadd.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @fadd_nxv1f32_allonesmask( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = fadd float [[Y:%.*]], 4.200000e+01 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP2]], [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @fadd_nxv1f32_allonesmask( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fadd.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -1164,14 +1173,23 @@ define @fadd_nxv1f32_anymask( %x, float } define @fsub_nxv1f32_allonesmask( %x, float %y, i32 zeroext %evl) { -; ALL-LABEL: @fsub_nxv1f32_allonesmask( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.fsub.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @fsub_nxv1f32_allonesmask( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = fsub float [[Y:%.*]], 4.200000e+01 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP2]], [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @fsub_nxv1f32_allonesmask( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fsub.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -1198,14 +1216,23 @@ define @fsub_nxv1f32_anymask( %x, float } define @fdiv_nxv1f32_allonesmask( %x, float %y, i32 zeroext %evl) { -; ALL-LABEL: @fdiv_nxv1f32_allonesmask( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.fdiv.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @fdiv_nxv1f32_allonesmask( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = fdiv float [[Y:%.*]], 4.200000e+01 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP2]], [[MASK]], i32 [[EVL:%.*]]) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @fdiv_nxv1f32_allonesmask( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fdiv.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 [[EVL:%.*]]) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 [[EVL]]) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -1275,14 +1302,23 @@ define @frem_nxv1f32_allonesmask( %x, f } define @fdiv_nxv1f32_allonesmask_knownvl( %x, float %y) { -; ALL-LABEL: @fdiv_nxv1f32_allonesmask_knownvl( -; ALL-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 -; ALL-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer -; ALL-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; ALL-NEXT: [[TMP3:%.*]] = call @llvm.vp.fdiv.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 4) -; ALL-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 4) -; ALL-NEXT: ret [[TMP4]] +; VEC-COMBINE-LABEL: @fdiv_nxv1f32_allonesmask_knownvl( +; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP1:%.*]] = fdiv float [[Y:%.*]], 4.200000e+01 +; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, float [[TMP1]], i64 0 +; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP2]], [[MASK]], i32 4) +; VEC-COMBINE-NEXT: ret [[TMP3]] +; +; NO-VEC-COMBINE-LABEL: @fdiv_nxv1f32_allonesmask_knownvl( +; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement poison, i1 true, i32 0 +; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector [[SPLAT]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement poison, float [[Y:%.*]], i64 0 +; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer +; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call @llvm.vp.fdiv.nxv1f32( [[TMP2]], shufflevector ( insertelement ( poison, float 4.200000e+01, i64 0), poison, zeroinitializer), [[MASK]], i32 4) +; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call @llvm.vp.fadd.nxv1f32( [[X:%.*]], [[TMP3]], [[MASK]], i32 4) +; NO-VEC-COMBINE-NEXT: ret [[TMP4]] ; %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer