diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ae39217dc8ff8..828f5c5dee23f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -22841,6 +22841,53 @@ Examples: llvm.experimental.vp.splice(, , -2, 3, 2); ==> trailing elements +.. _int_experimental_vp_splat: + + +'``llvm.experimental.vp.splat``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <2 x double> @llvm.experimental.vp.splat.v2f64(double %scalar, <2 x i1> %mask, i32 %evl) + declare @llvm.experimental.vp.splat.nxv4i32(i32 %scalar, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.experimental.vp.splat.*``' intrinsic is to create a predicated splat +with specific effective vector length. + +Arguments: +"""""""""" + +The result is a vector and it is a splat of the first scalar argument. The +second argument ``mask`` is a vector mask and has the same number of elements as +the result. The third argument is the explicit vector length of the operation. + +Semantics: +"""""""""" + +This intrinsic splats a vector with ``evl`` elements of a scalar argument. +The lanes in the result vector disabled by ``mask`` are ``poison``. The +elements past ``evl`` are poison. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x float> @llvm.vp.splat.v4f32(float %a, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + %e = insertelement <4 x float> poison, float %a, i32 0 + %s = shufflevector <4 x float> %e, <4 x float> poison, <4 x i32> zeroinitializer + %also.r = select <4 x i1> %mask, <4 x float> %s, <4 x float> poison + + .. _int_experimental_vp_reverse: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 65a9b68b5229d..0be7e963954ef 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2319,6 +2319,13 @@ def int_experimental_vp_reverse: llvm_i32_ty], [IntrNoMem]>; +def int_experimental_vp_splat: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMVectorElementType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem]>; + def int_vp_is_fpclass: DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [ llvm_anyvector_ty, diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 8eced073501e8..a4a1000d37259 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -777,6 +777,13 @@ END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE) ///// } Shuffles +// llvm.vp.splat(val,mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_splat, 1, 2) +BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_SPLAT, -1, experimental_vp_splat, 1, 2) +VP_PROPERTY_NO_FUNCTIONAL +HELPER_MAP_VPID_TO_VPSD(experimental_vp_splat, EXPERIMENTAL_VP_SPLAT) +END_REGISTER_VP(experimental_vp_splat, EXPERIMENTAL_VP_SPLAT) + #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC #undef BEGIN_REGISTER_VP_SDNODE diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index fed5ebcc3c903..74a143f63dbbe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -137,6 +137,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntRes_ScalarOp(N); break; case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; @@ -1916,6 +1917,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::EXPERIMENTAL_VP_SPLAT: Res = PromoteIntOp_ScalarOp(N); break; case ISD::VSELECT: @@ -2211,10 +2213,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, } SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT) + return SDValue( + DAG.UpdateNodeOperands(N, Op, N->getOperand(1), N->getOperand(2)), 0); + // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated, // so just promote the operand in place. - return SDValue(DAG.UpdateNodeOperands(N, - GetPromotedInteger(N->getOperand(0))), 0); + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { @@ -5231,6 +5237,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::EXPERIMENTAL_VP_SPLAT: case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; @@ -5859,6 +5866,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) { EVT NOutElemVT = NOutVT.getVectorElementType(); SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0)); + if (N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, N->getOperand(1), + N->getOperand(2)); return DAG.getNode(N->getOpcode(), dl, NOutVT, Op); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 85f947efe2c75..f20cfe6de60cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -915,6 +915,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi, bool SplitSETCC = false); void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -1052,6 +1053,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo); SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_STRICT_FSETCC(SDNode* N); SDValue WidenVecOp_VSELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bbf08e862da12..5015a665b1eb6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1076,6 +1076,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break; case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: SplitVecRes_ScalarOp(N, Lo, Hi); @@ -1992,6 +1993,16 @@ void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, } } +void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); + auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); + auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi); +} + void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -4284,6 +4295,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: + case ISD::EXPERIMENTAL_VP_SPLAT: Res = WidenVecRes_ScalarOp(N); break; case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; @@ -5814,6 +5826,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); } @@ -6353,6 +6368,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { Res = WidenVecOp_FP_TO_XINT_SAT(N); break; + case ISD::EXPERIMENTAL_VP_SPLAT: + Res = WidenVecOp_VP_SPLAT(N, OpNo); + break; + case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -6813,6 +6832,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { report_fatal_error("Unable to widen vector store"); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can widen only mask operand of vp_splat"); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), GetWidenedVector(N->getOperand(1)), + N->getOperand(2)); +} + SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of vp_store"); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index e17755c8ad57b..64a14da55b15e 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -699,6 +699,9 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, VPFunc = Intrinsic::getDeclaration( M, VPID, {Params[0]->getType(), Params[1]->getType()}); break; + case Intrinsic::experimental_vp_splat: + VPFunc = Intrinsic::getDeclaration(M, VPID, ReturnType); + break; } assert(VPFunc && "Could not declare VP intrinsic"); return VPFunc; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 19f958ccfd2e1..98d9b4286b708 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -705,7 +705,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX, ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT, - ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}; + ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF, + ISD::EXPERIMENTAL_VP_SPLAT}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, @@ -721,7 +722,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT, ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM, - ISD::VP_REDUCE_FMAXIMUM}; + ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, @@ -7268,6 +7269,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerVPSpliceExperimental(Op, DAG); case ISD::EXPERIMENTAL_VP_REVERSE: return lowerVPReverseExperimental(Op, DAG); + case ISD::EXPERIMENTAL_VP_SPLAT: + return lowerVPSplatExperimental(Op, DAG); case ISD::CLEAR_CACHE: { assert(getTargetMachine().getTargetTriple().isOSLinux() && "llvm.clear_cache only needs custom lower on Linux targets"); @@ -11630,6 +11633,29 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, return convertFromScalableVector(VT, Result, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Val = Op.getOperand(0); + SDValue Mask = Op.getOperand(1); + SDValue VL = Op.getOperand(2); + MVT VT = Op.getSimpleValueType(); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + + SDValue Result = + lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget); + + if (!VT.isFixedLengthVector()) + return Result; + return convertFromScalableVector(VT, Result, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 7d8bceb5cb341..449ff24492c69 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -972,6 +972,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll new file mode 100644 index 0000000000000..2913cbdf0fffd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll @@ -0,0 +1,452 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define <1 x i8> @vp_splat_v1i8(i8 %val, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <1 x i8> @llvm.experimental.vp.splat.v1i8(i8 %val, <1 x i1> %m, i32 %evl) + ret <1 x i8> %splat +} + +define <2 x i8> @vp_splat_v2i8(i8 %val, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 %val, <2 x i1> %m, i32 %evl) + ret <2 x i8> %splat +} + +define <4 x i8> @vp_splat_v4i8(i8 %val, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 %val, <4 x i1> %m, i32 %evl) + ret <4 x i8> %splat +} + +define <8 x i8> @vp_splat_v8i8(i8 %val, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 %val, <8 x i1> %m, i32 %evl) + ret <8 x i8> %splat +} + +define <16 x i8> @vp_splat_v16i8(i8 %val, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 %val, <16 x i1> %m, i32 %evl) + ret <16 x i8> %splat +} + +define <32 x i8> @vp_splat_v32i8(i8 %val, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <32 x i8> @llvm.experimental.vp.splat.v32i8(i8 %val, <32 x i1> %m, i32 %evl) + ret <32 x i8> %splat +} + +define <64 x i8> @vp_splat_v64i8(i8 %val, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <64 x i8> @llvm.experimental.vp.splat.v64i8(i8 %val, <64 x i1> %m, i32 %evl) + ret <64 x i8> %splat +} + +define <1 x i16> @vp_splat_v1i16(i16 %val, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <1 x i16> @llvm.experimental.vp.splat.v1i16(i16 %val, <1 x i1> %m, i32 %evl) + ret <1 x i16> %splat +} + +define <2 x i16> @vp_splat_v2i16(i16 %val, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 %val, <2 x i1> %m, i32 %evl) + ret <2 x i16> %splat +} + +define <4 x i16> @vp_splat_v4i16(i16 %val, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 %val, <4 x i1> %m, i32 %evl) + ret <4 x i16> %splat +} + +define <8 x i16> @vp_splat_v8i16(i16 %val, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 %val, <8 x i1> %m, i32 %evl) + ret <8 x i16> %splat +} + +define <16 x i16> @vp_splat_v16i16(i16 %val, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 %val, <16 x i1> %m, i32 %evl) + ret <16 x i16> %splat +} + +define <32 x i16> @vp_splat_v32i16(i16 %val, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <32 x i16> @llvm.experimental.vp.splat.v32i16(i16 %val, <32 x i1> %m, i32 %evl) + ret <32 x i16> %splat +} + +define <1 x i32> @vp_splat_v1i32(i32 %val, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <1 x i32> @llvm.experimental.vp.splat.v1i32(i32 %val, <1 x i1> %m, i32 %evl) + ret <1 x i32> %splat +} + +define <2 x i32> @vp_splat_v2i32(i32 %val, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 %val, <2 x i1> %m, i32 %evl) + ret <2 x i32> %splat +} + +define <4 x i32> @vp_splat_v4i32(i32 %val, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 %val, <4 x i1> %m, i32 %evl) + ret <4 x i32> %splat +} + +define <8 x i32> @vp_splat_v8i32(i32 %val, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 %val, <8 x i1> %m, i32 %evl) + ret <8 x i32> %splat +} + +define <16 x i32> @vp_splat_v16i32(i32 %val, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 %val, <16 x i1> %m, i32 %evl) + ret <16 x i32> %splat +} + +define <1 x i64> @vp_splat_v1i64(i64 %val, <1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_v1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_v1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call <1 x i64> @llvm.experimental.vp.splat.v1i64(i64 %val, <1 x i1> %m, i32 %evl) + ret <1 x i64> %splat +} + +define <2 x i64> @vp_splat_v2i64(i64 %val, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 %val, <2 x i1> %m, i32 %evl) + ret <2 x i64> %splat +} + +define <4 x i64> @vp_splat_v4i64(i64 %val, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 %val, <4 x i1> %m, i32 %evl) + ret <4 x i64> %splat +} + +define <8 x i64> @vp_splat_v8i64(i64 %val, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 %val, <8 x i1> %m, i32 %evl) + ret <8 x i64> %splat +} + +define <1 x half> @vp_splat_v1f16(half %val, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <1 x half> @llvm.experimental.vp.splat.v1f16(half %val, <1 x i1> %m, i32 %evl) + ret <1 x half> %splat +} + +define <2 x half> @vp_splat_v2f16(half %val, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <2 x half> @llvm.experimental.vp.splat.v2f16(half %val, <2 x i1> %m, i32 %evl) + ret <2 x half> %splat +} + +define <4 x half> @vp_splat_v4f16(half %val, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <4 x half> @llvm.experimental.vp.splat.v4f16(half %val, <4 x i1> %m, i32 %evl) + ret <4 x half> %splat +} + +define <8 x half> @vp_splat_v8f16(half %val, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <8 x half> @llvm.experimental.vp.splat.v8f16(half %val, <8 x i1> %m, i32 %evl) + ret <8 x half> %splat +} + +define <16 x half> @vp_splat_v16f16(half %val, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <16 x half> @llvm.experimental.vp.splat.v16f16(half %val, <16 x i1> %m, i32 %evl) + ret <16 x half> %splat +} + +define <32 x half> @vp_splat_v32f16(half %val, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <32 x half> @llvm.experimental.vp.splat.v32f16(half %val, <32 x i1> %m, i32 %evl) + ret <32 x half> %splat +} + +define <1 x float> @vp_splat_v1f32(float %val, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <1 x float> @llvm.experimental.vp.splat.v1f32(float %val, <1 x i1> %m, i32 %evl) + ret <1 x float> %splat +} + +define <2 x float> @vp_splat_v2f32(float %val, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <2 x float> @llvm.experimental.vp.splat.v2f32(float %val, <2 x i1> %m, i32 %evl) + ret <2 x float> %splat +} + +define <4 x float> @vp_splat_v4f32(float %val, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <4 x float> @llvm.experimental.vp.splat.v4f32(float %val, <4 x i1> %m, i32 %evl) + ret <4 x float> %splat +} + +define <8 x float> @vp_splat_v8f32(float %val, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <8 x float> @llvm.experimental.vp.splat.v8f32(float %val, <8 x i1> %m, i32 %evl) + ret <8 x float> %splat +} + +define <16 x float> @vp_splat_v16f32(float %val, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <16 x float> @llvm.experimental.vp.splat.v16f32(float %val, <16 x i1> %m, i32 %evl) + ret <16 x float> %splat +} + +define <1 x double> @vp_splat_v1f64(double %val, <1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <1 x double> @llvm.experimental.vp.splat.v1f64(double %val, <1 x i1> %m, i32 %evl) + ret <1 x double> %splat +} + +define <2 x double> @vp_splat_v2f64(double %val, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <2 x double> @llvm.experimental.vp.splat.v2f64(double %val, <2 x i1> %m, i32 %evl) + ret <2 x double> %splat +} + +define <4 x double> @vp_splat_v4f64(double %val, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <4 x double> @llvm.experimental.vp.splat.v4f64(double %val, <4 x i1> %m, i32 %evl) + ret <4 x double> %splat +} + +define <8 x double> @vp_splat_v8f64(double %val, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call <8 x double> @llvm.experimental.vp.splat.v8f64(double %val, <8 x i1> %m, i32 %evl) + ret <8 x double> %splat +} + +define <16 x i31> @vp_splat_v16i31(i31 %val, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v16i31: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <16 x i31> @llvm.experimental.vp.splat.v16i31(i31 %val, <16 x i1> %m, i32 %evl) + ret <16 x i31> %splat +} + +define <15 x i32> @vp_splat_v15i32(i32 %val, <15 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v15i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <15 x i32> @llvm.experimental.vp.splat.v15i32(i32 %val, <15 x i1> %m, i32 %evl) + ret <15 x i32> %splat +} + +; Split case. +define <32 x i32> @vp_splat_v32i32(i32 %val, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_v32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call <32 x i32> @llvm.experimental.vp.splat.v32i32(i32 %val, <32 x i1> %m, i32 %evl) + ret <32 x i32> %splat +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll new file mode 100644 index 0000000000000..5fbdefda9f402 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll @@ -0,0 +1,464 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define @vp_splat_nxv1i8(i8 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1i8(i8 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2i8(i8 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2i8(i8 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4i8(i8 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4i8(i8 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8i8(i8 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8i8(i8 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16i8(i8 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16i8(i8 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv32i8(i8 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32i8(i8 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv64i8(i8 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv64i8(i8 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv1i16(i16 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1i16(i16 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2i16(i16 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2i16(i16 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4i16(i16 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4i16(i16 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8i16(i16 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8i16(i16 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16i16(i16 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16i16(i16 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv32i16(i16 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32i16(i16 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv1i32(i32 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1i32(i32 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2i32(i32 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2i32(i32 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4i32(i32 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4i32(i32 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8i32(i32 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8i32(i32 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16i32(i32 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16i32(i32 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv1i64(i64 %val, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1i64(i64 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2i64(i64 %val, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2i64(i64 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4i64(i64 %val, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4i64(i64 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8i64(i64 %val, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_splat_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a0), zero +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_splat_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8i64(i64 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv1f16(half %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2f16(half %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4f16(half %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8f16(half %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16f16(half %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv32f16(half %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32f16(half %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv1f32(float %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1f32(float %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2f32(float %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2f32(float %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4f32(float %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4f32(float %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8f32(float %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8f32(float %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16f32(float %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16f32(float %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv1f64(double %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv1f64(double %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv2f64(double %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv2f64(double %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv4f64(double %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv4f64(double %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv8f64(double %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv8f64(double %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv16i31(i31 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv16i31: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv16i31(i31 %val, %m, i32 %evl) + ret %splat +} + +define @vp_splat_nxv15i32(i32 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv15i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv15i32(i32 %val, %m, i32 %evl) + ret %splat +} + +; Split case. +define @vp_splat_nxv32i32(i32 %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_splat_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub a3, a1, a2 +; CHECK-NEXT: sltu a4, a1, a3 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: bltu a1, a2, .LBB39_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB39_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + %splat = call @llvm.experimental.vp.splat.nxv32i32(i32 %val, %m, i32 %evl) + ret %splat +} diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index d6508abd5197e..eab2850ca4e1e 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -108,6 +108,8 @@ class VPIntrinsicTest : public testing::Test { "addrspace(1)*, i32, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x " "i1>, i32) "; + Str << " declare <8 x i32> @llvm.experimental.vp.splat.v8i32(i32, <8 x " + "i1>, i32) "; for (const char *ReductionOpcode : ReductionIntOpcodes) Str << " declare i32 @llvm.vp.reduce." << ReductionOpcode