From 40269c50b4b81bb075ee20dd8f887564c75f864e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 2 Dec 2024 18:51:41 +0800 Subject: [PATCH] [RISCV] Clear vill for whole vector register moves in vsetvli insertion This is an alternative to #117866 that works by demanding a valid vtype instead using a separate pass. The main advantage of this is that it allows coalesceVSETVLIs to just reuse an existing vsetvli later in the block. To do this we need to first transfer the vsetvli info to some arbitrary valid state in transferBefore when we encounter a vector copy. Then we add a new vill demanded field that will happily accept any other known vtype, which allows us to coalesce these where possible. Note we also need to check for vector copies in computeVLVTYPEChanges, otherwise the pass will completely skip over functions that only have vector copies and nothing else. This is one part of a fix for #114518. We still need to check if there's other cases where vector copies/whole register moves that are inserted after vsetvli insertion. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 61 +- .../CodeGen/RISCV/inline-asm-v-constraint.ll | 2 + llvm/test/CodeGen/RISCV/rvv/abs-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 4 +- .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 4 + llvm/test/CodeGen/RISCV/rvv/calling-conv.ll | 4 + llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/compressstore.ll | 4 +- .../RISCV/rvv/constant-folding-crash.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/expandload.ll | 551 ++++- .../CodeGen/RISCV/rvv/extract-subvector.ll | 19 + .../rvv/fixed-vector-i8-index-cornercase.ll | 4 +- .../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 2 + .../rvv/fixed-vectors-calling-conv-fastcc.ll | 1 + .../RISCV/rvv/fixed-vectors-calling-conv.ll | 1 + .../RISCV/rvv/fixed-vectors-ceil-vp.ll | 13 +- .../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-floor-vp.ll | 13 +- .../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 22 +- .../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 22 +- .../RISCV/rvv/fixed-vectors-fp-interleave.ll | 4 +- .../RISCV/rvv/fixed-vectors-fptrunc-vp.ll | 2 +- .../RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll | 2 +- .../rvv/fixed-vectors-insert-subvector.ll | 2 + .../RISCV/rvv/fixed-vectors-int-interleave.ll | 4 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 2057 ++++++++--------- .../rvv/fixed-vectors-masked-load-int.ll | 1 + .../RISCV/rvv/fixed-vectors-nearbyint-vp.ll | 9 +- .../rvv/fixed-vectors-reduction-mask-vp.ll | 30 + .../RISCV/rvv/fixed-vectors-rint-vp.ll | 9 +- .../RISCV/rvv/fixed-vectors-round-vp.ll | 13 +- .../RISCV/rvv/fixed-vectors-roundeven-vp.ll | 13 +- .../RISCV/rvv/fixed-vectors-roundtozero-vp.ll | 13 +- .../RISCV/rvv/fixed-vectors-setcc-int-vp.ll | 3 + .../RISCV/rvv/fixed-vectors-shuffle-concat.ll | 17 +- .../rvv/fixed-vectors-shuffle-exact-vlen.ll | 2 + .../rvv/fixed-vectors-shuffle-reverse.ll | 22 +- .../rvv/fixed-vectors-shuffle-vslide1up.ll | 2 +- .../fixed-vectors-strided-load-store-asm.ll | 1 + .../RISCV/rvv/fixed-vectors-strided-vpload.ll | 3 + .../RISCV/rvv/fixed-vectors-trunc-vp.ll | 6 +- .../RISCV/rvv/fixed-vectors-unaligned.ll | 68 +- .../RISCV/rvv/fixed-vectors-vadd-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vmax-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vmaxu-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vmin-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vminu-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vpgather.ll | 2 +- .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 1 + .../RISCV/rvv/fixed-vectors-vpmerge.ll | 2 +- .../RISCV/rvv/fixed-vectors-vsadd-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vsaddu-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vssub-vp.ll | 1 + .../RISCV/rvv/fixed-vectors-vssubu-vp.ll | 1 + llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 28 +- .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 36 +- .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 36 +- .../RISCV/rvv/fold-scalar-load-crash.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 7 +- llvm/test/CodeGen/RISCV/rvv/inline-asm.ll | 7 + .../CodeGen/RISCV/rvv/insert-subvector.ll | 22 + llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/masked-tama.ll | 3 + llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 18 +- .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll | 2 +- .../RISCV/rvv/named-vector-shuffle-reverse.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/pr88576.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/rint-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 28 +- .../RISCV/rvv/rv32-spill-vector-csr.ll | 1 + .../CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll | 5 + .../RISCV/rvv/rv64-spill-vector-csr.ll | 1 + .../CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll | 5 + .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 1 + .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 12 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 4 + .../test/CodeGen/RISCV/rvv/strided-vpstore.ll | 1 + .../RISCV/rvv/undef-earlyclobber-chain.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vcpop.ll | 7 + .../RISCV/rvv/vector-deinterleave-fixed.ll | 2 +- .../CodeGen/RISCV/rvv/vector-deinterleave.ll | 16 +- .../RISCV/rvv/vector-interleave-fixed.ll | 8 +- .../RISCV/rvv/vector-interleave-store.ll | 2 +- .../CodeGen/RISCV/rvv/vector-interleave.ll | 30 +- .../RISCV/rvv/vector-reassociations.ll | 4 + llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 24 +- llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vfirst.ll | 7 + llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 25 +- .../RISCV/rvv/vfmadd-constrained-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll | 2 +- .../RISCV/rvv/vfnmadd-constrained-sdnode.ll | 2 +- .../RISCV/rvv/vfnmsub-constrained-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 3 +- .../CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll | 1 + llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll | 165 ++ .../CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll | 1 + llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll | 165 ++ llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vmfeq.ll | 24 + llvm/test/CodeGen/RISCV/rvv/vmfge.ll | 24 + llvm/test/CodeGen/RISCV/rvv/vmfgt.ll | 24 + llvm/test/CodeGen/RISCV/rvv/vmfle.ll | 24 + llvm/test/CodeGen/RISCV/rvv/vmflt.ll | 24 + llvm/test/CodeGen/RISCV/rvv/vmfne.ll | 24 + llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vmsbf.ll | 7 + llvm/test/CodeGen/RISCV/rvv/vmseq.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsge.ll | 55 + llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsgt.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsif.ll | 7 + llvm/test/CodeGen/RISCV/rvv/vmsle.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsleu.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmslt.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsltu.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsne.ll | 54 + llvm/test/CodeGen/RISCV/rvv/vmsof.ll | 7 + .../CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 1 + llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll | 4 + llvm/test/CodeGen/RISCV/rvv/vp-select.ll | 1 + .../RISCV/rvv/vp-splice-mask-fixed-vectors.ll | 12 + .../RISCV/rvv/vp-splice-mask-vectors.ll | 21 + .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 1 + .../CodeGen/RISCV/rvv/vreductions-mask-vp.ll | 37 + .../RISCV/rvv/vrgatherei16-subreg-liveness.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll | 1 + llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 1 + llvm/test/CodeGen/RISCV/rvv/vselect-int.ll | 1 + llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 11 +- .../CodeGen/RISCV/rvv/vsetvli-insert-O0.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll | 6 +- llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll | 2 +- 175 files changed, 3412 insertions(+), 1543 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 052b4a61298223..20b4a40ecaa8f3 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -195,6 +195,27 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) { return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); } +/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. +static bool isVecCopy(const MachineInstr &MI) { + static const TargetRegisterClass *RVVRegClasses[] = { + &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass, + &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass, + &RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass, + &RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass, + &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass}; + if (!MI.isCopy()) + return false; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + for (const auto &RegClass : RVVRegClasses) { + if (RegClass->contains(DstReg, SrcReg)) { + return true; + } + } + return false; +} + /// Which subfields of VL or VTYPE have values we need to preserve? struct DemandedFields { // Some unknown property of VL is used. If demanded, must preserve entire @@ -221,10 +242,13 @@ struct DemandedFields { bool SEWLMULRatio = false; bool TailPolicy = false; bool MaskPolicy = false; + // If this is true, we demand that VTYPE is set to some legal state, i.e. that + // vill is unset. + bool VILL = false; // Return true if any part of VTYPE was used bool usedVTYPE() const { - return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; + return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL; } // Return true if any property of VL was used @@ -239,6 +263,7 @@ struct DemandedFields { SEWLMULRatio = true; TailPolicy = true; MaskPolicy = true; + VILL = true; } // Mark all VL properties as demanded @@ -263,6 +288,7 @@ struct DemandedFields { SEWLMULRatio |= B.SEWLMULRatio; TailPolicy |= B.TailPolicy; MaskPolicy |= B.MaskPolicy; + VILL |= B.VILL; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -308,7 +334,8 @@ struct DemandedFields { OS << ", "; OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; OS << "TailPolicy=" << TailPolicy << ", "; - OS << "MaskPolicy=" << MaskPolicy; + OS << "MaskPolicy=" << MaskPolicy << ", "; + OS << "VILL=" << VILL; OS << "}"; } #endif @@ -503,6 +530,16 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { } } + // In ยง32.16.6, whole vector register moves have a dependency on SEW. At the + // MIR level though we don't encode the element type, and it gives the same + // result whatever the SEW may be. + // + // However it does need valid SEW, i.e. vill must be cleared. The entry to a + // function, calls and inline assembly may all set it, so make sure we clear + // it for whole register copies. + if (isVecCopy(MI)) + Res.VILL = true; + return Res; } @@ -1208,6 +1245,17 @@ static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, // legal for MI, but may not be the state requested by MI. void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const { + if (isVecCopy(MI) && + (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) { + // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may + // be coalesced into another vsetvli since we won't demand any fields. + VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly + NewInfo.setAVLImm(0); + NewInfo.setVTYPE(RISCVII::VLMUL::LMUL_1, 8, true, true); + Info = NewInfo; + return; + } + if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) return; @@ -1296,7 +1344,8 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, for (const MachineInstr &MI : MBB) { transferBefore(Info, MI); - if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags)) + if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) || + isVecCopy(MI)) HadVectorOp = true; transferAfter(Info, MI); @@ -1426,6 +1475,12 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { PrefixTransparent = false; } + if (isVecCopy(MI) && + !PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) { + insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo); + PrefixTransparent = false; + } + uint64_t TSFlags = MI.getDesc().TSFlags; if (RISCVII::hasSEWOp(TSFlags)) { if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) { diff --git a/llvm/test/CodeGen/RISCV/inline-asm-v-constraint.ll b/llvm/test/CodeGen/RISCV/inline-asm-v-constraint.ll index c04e4fea7b2c29..45bc1222c0677c 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-v-constraint.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-v-constraint.ll @@ -45,6 +45,7 @@ define @constraint_vd( %0, define @constraint_vm( %0, %1) nounwind { ; RV32I-LABEL: constraint_vm: ; RV32I: # %bb.0: +; RV32I-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32I-NEXT: vmv1r.v v9, v0 ; RV32I-NEXT: vmv1r.v v0, v8 ; RV32I-NEXT: #APP @@ -54,6 +55,7 @@ define @constraint_vm( %0, ; ; RV64I-LABEL: constraint_vm: ; RV64I: # %bb.0: +; RV64I-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64I-NEXT: vmv1r.v v9, v0 ; RV64I-NEXT: vmv1r.v v0, v8 ; RV64I-NEXT: #APP diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll index 163d9145bc3623..ee0016ec080e24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll @@ -567,6 +567,7 @@ define @vp_abs_nxv16i64( %va, @vp_abs_nxv16i64( %va, @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_bitreverse_nxv64i16( %va, @vp_bswap_nxv64i16( %va, @vp_bswap_nxv64i16( %va, @vp_bswap_nxv64i16( %va, @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv8r.v v16, v0 ; RV32-NEXT: call ext3 ; RV32-NEXT: addi sp, s0, -144 @@ -523,6 +526,7 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv8r.v v16, v0 ; RV64-NEXT: call ext3 ; RV64-NEXT: addi sp, s0, -144 diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll index 9b27116fef7cae..8d56a76dc8eb80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -103,6 +103,7 @@ define target("riscv.vector.tuple", , 2) @caller_tuple_return( ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: call callee_tuple_return +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv2r.v v6, v8 ; RV32-NEXT: vmv2r.v v8, v10 ; RV32-NEXT: vmv2r.v v10, v6 @@ -119,6 +120,7 @@ define target("riscv.vector.tuple", , 2) @caller_tuple_return( ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: call callee_tuple_return +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv2r.v v6, v8 ; RV64-NEXT: vmv2r.v v8, v10 ; RV64-NEXT: vmv2r.v v10, v6 @@ -144,6 +146,7 @@ define void @caller_tuple_argument(target("riscv.vector.tuple", @llvm.vp.ceil.nxv4bf16(, @vp_ceil_vv_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -169,8 +169,8 @@ declare @llvm.vp.ceil.nxv8bf16(, @vp_ceil_vv_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -221,8 +221,8 @@ declare @llvm.vp.ceil.nxv16bf16(, < define @vp_ceil_vv_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -279,9 +279,9 @@ define @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 @@ -582,8 +582,8 @@ define @vp_ceil_vv_nxv4f16( %va, @llvm.vp.ceil.nxv8f16(, @vp_ceil_vv_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) @@ -668,8 +669,8 @@ define @vp_ceil_vv_nxv8f16( %va, @llvm.vp.ceil.nxv16f16(, @vp_ceil_vv_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) @@ -754,8 +756,8 @@ define @vp_ceil_vv_nxv16f16( %va, @llvm.vp.ceil.nxv32f16(, @vp_ceil_vv_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) @@ -846,9 +849,9 @@ define @vp_ceil_vv_nxv32f16( %va, @llvm.vp.ceil.nxv4f32(, @vp_ceil_vv_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -1112,6 +1116,7 @@ declare @llvm.vp.ceil.nxv8f32(, @vp_ceil_vv_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -1156,6 +1161,7 @@ declare @llvm.vp.ceil.nxv16f32(, @vp_ceil_vv_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1242,6 +1248,7 @@ declare @llvm.vp.ceil.nxv2f64(, @vp_ceil_vv_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) @@ -1286,6 +1293,7 @@ declare @llvm.vp.ceil.nxv4f64(, @vp_ceil_vv_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) @@ -1330,6 +1338,7 @@ declare @llvm.vp.ceil.nxv7f64(, @vp_ceil_vv_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) @@ -1374,6 +1383,7 @@ declare @llvm.vp.ceil.nxv8f64(, @vp_ceil_vv_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) @@ -1425,13 +1435,13 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll index bfb2d0a3accc44..419a50cec825a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/compressstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/compressstore.ll @@ -197,9 +197,9 @@ entry: define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data) { ; RV64-LABEL: test_compresstore_v256i8: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv1r.v v7, v8 ; RV64-NEXT: li a2, 128 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vi v9, v0, 1 ; RV64-NEXT: vmv.x.s a3, v0 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma @@ -230,9 +230,9 @@ define void @test_compresstore_v256i8(ptr %p, <256 x i1> %mask, <256 x i8> %data ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: sub sp, sp, a2 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv8r.v v24, v16 ; RV32-NEXT: li a2, 128 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v9, v0, 1 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vmv.x.s a4, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll index ad176df71397e6..f6c26bbba89fe5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll @@ -18,11 +18,11 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lanes.b, <4 x i1> %sel) { ; RV32-LABEL: constant_folding_crash: ; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: lw a0, 8(a0) ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vmsne.vi v0, v11, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -43,11 +43,11 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; ; RV64-LABEL: constant_folding_crash: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: ld a0, 8(a0) ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: seqz a0, a0 -; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vmv.v.x v13, a0 ; RV64-NEXT: vmsne.vi v0, v13, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index f56a792fdef6a8..ce4bc48dff0426 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -1235,13 +1235,13 @@ declare @llvm.vp.ctlz.nxv16i64(, i1 immar define @vp_ctlz_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv16i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: fsrmi a4, 1 ; CHECK-NEXT: li a2, 52 ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: sub a5, a0, a1 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sltu a3, a0, a5 ; CHECK-NEXT: addi a3, a3, -1 @@ -1270,11 +1270,11 @@ define @vp_ctlz_nxv16i64( %va, @vp_ctlz_zero_undef_nxv8i64_unmasked( @vp_ctlz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: fsrmi a3, 1 ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a4, a0, a1 -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a4 ; CHECK-NEXT: addi a2, a2, -1 @@ -2497,11 +2497,11 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 ; CHECK-ZVBB-NEXT: sub a3, a0, a1 -; CHECK-ZVBB-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-ZVBB-NEXT: sltu a2, a0, a3 ; CHECK-ZVBB-NEXT: addi a2, a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index 9e75dc9dccffde..52ddd9ab2f8329 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2022,6 +2022,7 @@ define @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -4012,7 +4013,6 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: fsrmi a3, 1 ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a4, a0, a1 -; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a4 ; CHECK-NEXT: addi a2, a2, -1 @@ -4057,11 +4057,11 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 ; CHECK-ZVBB-NEXT: sub a3, a0, a1 -; CHECK-ZVBB-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-ZVBB-NEXT: sltu a2, a0, a3 ; CHECK-ZVBB-NEXT: addi a2, a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/expandload.ll b/llvm/test/CodeGen/RISCV/rvv/expandload.ll index b32d85bb1943a5..13a7e444b77f3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/expandload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/expandload.ll @@ -227,9 +227,9 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV32-NEXT: add a2, sp, a2 ; CHECK-RV32-NEXT: addi a2, a2, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v7, v8 ; CHECK-RV32-NEXT: li a2, 128 -; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v9, v0, 1 ; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: vmv.x.s a4, v0 @@ -338,9 +338,9 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV64-NEXT: add a2, sp, a2 ; CHECK-RV64-NEXT: addi a2, a2, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v7, v8 ; CHECK-RV64-NEXT: li a2, 128 -; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v9, v0, 1 ; CHECK-RV64-NEXT: vmv.x.s a3, v0 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma @@ -1626,8 +1626,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: bgez a1, .LBB61_30 ; CHECK-RV32-NEXT: .LBB61_29: # %cond.load109 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 29, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 28 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -1639,8 +1639,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: bgez a2, .LBB61_32 ; CHECK-RV32-NEXT: # %bb.31: # %cond.load113 ; CHECK-RV32-NEXT: lbu a2, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 30, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a2 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 29 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -1787,6 +1787,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_65: # %cond.load241 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -1940,6 +1941,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_99: # %cond.load369 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -2093,6 +2095,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_133: # %cond.load497 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -2246,6 +2249,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_167: # %cond.load625 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -2399,6 +2403,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_201: # %cond.load753 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -2552,6 +2557,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_235: # %cond.load881 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -2705,6 +2711,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_269: # %cond.load1009 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -3907,10 +3914,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_2 ; CHECK-RV32-NEXT: .LBB61_545: # %cond.load1 ; CHECK-RV32-NEXT: lbu a1, 0(a0) +; CHECK-RV32-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 -; CHECK-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a1 -; CHECK-RV32-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 1 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 @@ -3920,8 +3926,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_3 ; CHECK-RV32-NEXT: .LBB61_546: # %cond.load5 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 3, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 2 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -3932,8 +3938,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_4 ; CHECK-RV32-NEXT: .LBB61_547: # %cond.load9 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 3 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -3944,8 +3950,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_5 ; CHECK-RV32-NEXT: .LBB61_548: # %cond.load13 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 5, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 4 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -3956,8 +3962,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_6 ; CHECK-RV32-NEXT: .LBB61_549: # %cond.load17 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 6, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 5 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -3968,8 +3974,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_7 ; CHECK-RV32-NEXT: .LBB61_550: # %cond.load21 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 7, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 6 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -3980,8 +3986,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_8 ; CHECK-RV32-NEXT: .LBB61_551: # %cond.load25 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 7 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -3992,8 +3998,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_9 ; CHECK-RV32-NEXT: .LBB61_552: # %cond.load29 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 9, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 8 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4004,8 +4010,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_10 ; CHECK-RV32-NEXT: .LBB61_553: # %cond.load33 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 10, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 9 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4016,8 +4022,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_11 ; CHECK-RV32-NEXT: .LBB61_554: # %cond.load37 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 11, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 10 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4028,8 +4034,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_12 ; CHECK-RV32-NEXT: .LBB61_555: # %cond.load41 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 12, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 11 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4040,8 +4046,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_13 ; CHECK-RV32-NEXT: .LBB61_556: # %cond.load45 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 13, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 12 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4052,8 +4058,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_14 ; CHECK-RV32-NEXT: .LBB61_557: # %cond.load49 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 14, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 13 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4064,8 +4070,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_15 ; CHECK-RV32-NEXT: .LBB61_558: # %cond.load53 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 15, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 14 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4076,8 +4082,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_16 ; CHECK-RV32-NEXT: .LBB61_559: # %cond.load57 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 15 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4088,8 +4094,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_17 ; CHECK-RV32-NEXT: .LBB61_560: # %cond.load61 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 17, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 16 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4100,8 +4106,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_18 ; CHECK-RV32-NEXT: .LBB61_561: # %cond.load65 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 18, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 17 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4112,8 +4118,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_19 ; CHECK-RV32-NEXT: .LBB61_562: # %cond.load69 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 19, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 18 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4124,8 +4130,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_20 ; CHECK-RV32-NEXT: .LBB61_563: # %cond.load73 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 20, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 19 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4136,8 +4142,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_21 ; CHECK-RV32-NEXT: .LBB61_564: # %cond.load77 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 21, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 20 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4148,8 +4154,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_22 ; CHECK-RV32-NEXT: .LBB61_565: # %cond.load81 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 22, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 21 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4160,8 +4166,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_23 ; CHECK-RV32-NEXT: .LBB61_566: # %cond.load85 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 23, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 22 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4172,8 +4178,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_24 ; CHECK-RV32-NEXT: .LBB61_567: # %cond.load89 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 24, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 23 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4184,8 +4190,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_25 ; CHECK-RV32-NEXT: .LBB61_568: # %cond.load93 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 25, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 24 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4196,8 +4202,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_26 ; CHECK-RV32-NEXT: .LBB61_569: # %cond.load97 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 26, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 25 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4208,8 +4214,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_27 ; CHECK-RV32-NEXT: .LBB61_570: # %cond.load101 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 27, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 26 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4220,8 +4226,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: j .LBB61_28 ; CHECK-RV32-NEXT: .LBB61_571: # %cond.load105 ; CHECK-RV32-NEXT: lbu a1, 0(a0) -; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetivli zero, 28, e8, m1, tu, ma +; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 27 ; CHECK-RV32-NEXT: addi a0, a0, 1 @@ -4248,6 +4254,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_573: # %cond.load125 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4264,6 +4271,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_574: # %cond.load129 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4280,6 +4288,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_575: # %cond.load133 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4296,6 +4305,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_576: # %cond.load137 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4312,6 +4322,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_577: # %cond.load141 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4328,6 +4339,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_578: # %cond.load145 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4344,6 +4356,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_579: # %cond.load149 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4360,6 +4373,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_580: # %cond.load153 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4376,6 +4390,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_581: # %cond.load157 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4392,6 +4407,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_582: # %cond.load161 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4408,6 +4424,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_583: # %cond.load165 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4424,6 +4441,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_584: # %cond.load169 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4440,6 +4458,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_585: # %cond.load173 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4456,6 +4475,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_586: # %cond.load177 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4472,6 +4492,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_587: # %cond.load181 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4488,6 +4509,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_588: # %cond.load185 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4504,6 +4526,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_589: # %cond.load189 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4520,6 +4543,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_590: # %cond.load193 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4536,6 +4560,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_591: # %cond.load197 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4552,6 +4577,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_592: # %cond.load201 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4568,6 +4594,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_593: # %cond.load205 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4584,6 +4611,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_594: # %cond.load209 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4600,6 +4628,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_595: # %cond.load213 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4616,6 +4645,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_596: # %cond.load217 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4632,6 +4662,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_597: # %cond.load221 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4648,6 +4679,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_598: # %cond.load225 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4664,6 +4696,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_599: # %cond.load229 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4680,6 +4713,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_600: # %cond.load233 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4696,6 +4730,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_601: # %cond.load237 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a3 @@ -4728,6 +4763,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_603: # %cond.load253 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4744,6 +4780,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_604: # %cond.load257 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4760,6 +4797,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_605: # %cond.load261 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4776,6 +4814,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_606: # %cond.load265 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4792,6 +4831,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_607: # %cond.load269 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4808,6 +4848,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_608: # %cond.load273 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4824,6 +4865,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_609: # %cond.load277 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4840,6 +4882,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_610: # %cond.load281 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4856,6 +4899,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_611: # %cond.load285 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4872,6 +4916,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_612: # %cond.load289 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4888,6 +4933,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_613: # %cond.load293 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4904,6 +4950,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_614: # %cond.load297 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4920,6 +4967,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_615: # %cond.load301 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4936,6 +4984,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_616: # %cond.load305 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4952,6 +5001,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_617: # %cond.load309 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4968,6 +5018,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_618: # %cond.load313 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -4984,6 +5035,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_619: # %cond.load317 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5000,6 +5052,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_620: # %cond.load321 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5016,6 +5069,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_621: # %cond.load325 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5032,6 +5086,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_622: # %cond.load329 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5048,6 +5103,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_623: # %cond.load333 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5064,6 +5120,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_624: # %cond.load337 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5080,6 +5137,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_625: # %cond.load341 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5096,6 +5154,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_626: # %cond.load345 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5112,6 +5171,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_627: # %cond.load349 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5128,6 +5188,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_628: # %cond.load353 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5144,6 +5205,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_629: # %cond.load357 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5160,6 +5222,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_630: # %cond.load361 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5176,6 +5239,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_631: # %cond.load365 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a2 @@ -5208,6 +5272,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_633: # %cond.load381 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5224,6 +5289,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_634: # %cond.load385 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5240,6 +5306,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_635: # %cond.load389 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5256,6 +5323,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_636: # %cond.load393 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5272,6 +5340,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_637: # %cond.load397 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5288,6 +5357,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_638: # %cond.load401 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5304,6 +5374,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_639: # %cond.load405 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5320,6 +5391,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_640: # %cond.load409 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5336,6 +5408,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_641: # %cond.load413 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5352,6 +5425,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_642: # %cond.load417 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5368,6 +5442,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_643: # %cond.load421 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5384,6 +5459,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_644: # %cond.load425 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5400,6 +5476,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_645: # %cond.load429 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5416,6 +5493,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_646: # %cond.load433 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5432,6 +5510,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_647: # %cond.load437 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5448,6 +5527,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_648: # %cond.load441 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5464,6 +5544,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_649: # %cond.load445 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5480,6 +5561,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_650: # %cond.load449 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5496,6 +5578,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_651: # %cond.load453 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5512,6 +5595,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_652: # %cond.load457 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5528,6 +5612,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_653: # %cond.load461 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5544,6 +5629,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_654: # %cond.load465 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5560,6 +5646,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_655: # %cond.load469 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5576,6 +5663,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_656: # %cond.load473 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5592,6 +5680,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_657: # %cond.load477 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5608,6 +5697,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_658: # %cond.load481 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5624,6 +5714,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_659: # %cond.load485 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5640,6 +5731,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_660: # %cond.load489 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5656,6 +5748,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_661: # %cond.load493 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a3 @@ -5688,6 +5781,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_663: # %cond.load509 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5704,6 +5798,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_664: # %cond.load513 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5720,6 +5815,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_665: # %cond.load517 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5736,6 +5832,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_666: # %cond.load521 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5752,6 +5849,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_667: # %cond.load525 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5768,6 +5866,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_668: # %cond.load529 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5784,6 +5883,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_669: # %cond.load533 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5800,6 +5900,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_670: # %cond.load537 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5816,6 +5917,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_671: # %cond.load541 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5832,6 +5934,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_672: # %cond.load545 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5848,6 +5951,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_673: # %cond.load549 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5864,6 +5968,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_674: # %cond.load553 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5880,6 +5985,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_675: # %cond.load557 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5896,6 +6002,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_676: # %cond.load561 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5912,6 +6019,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_677: # %cond.load565 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5928,6 +6036,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_678: # %cond.load569 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5944,6 +6053,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_679: # %cond.load573 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5960,6 +6070,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_680: # %cond.load577 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5976,6 +6087,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_681: # %cond.load581 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -5992,6 +6104,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_682: # %cond.load585 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6008,6 +6121,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_683: # %cond.load589 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6024,6 +6138,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_684: # %cond.load593 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6040,6 +6155,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_685: # %cond.load597 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6056,6 +6172,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_686: # %cond.load601 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6072,6 +6189,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_687: # %cond.load605 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6088,6 +6206,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_688: # %cond.load609 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6104,6 +6223,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_689: # %cond.load613 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6120,6 +6240,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_690: # %cond.load617 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6136,6 +6257,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_691: # %cond.load621 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6168,6 +6290,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_693: # %cond.load637 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6184,6 +6307,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_694: # %cond.load641 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6200,6 +6324,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_695: # %cond.load645 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6216,6 +6341,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_696: # %cond.load649 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6232,6 +6358,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_697: # %cond.load653 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6248,6 +6375,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_698: # %cond.load657 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6264,6 +6392,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_699: # %cond.load661 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6280,6 +6409,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_700: # %cond.load665 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6296,6 +6426,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_701: # %cond.load669 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6312,6 +6443,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_702: # %cond.load673 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6328,6 +6460,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_703: # %cond.load677 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6344,6 +6477,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_704: # %cond.load681 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6360,6 +6494,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_705: # %cond.load685 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6376,6 +6511,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_706: # %cond.load689 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6392,6 +6528,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_707: # %cond.load693 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6408,6 +6545,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_708: # %cond.load697 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6424,6 +6562,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_709: # %cond.load701 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6440,6 +6579,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_710: # %cond.load705 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6456,6 +6596,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_711: # %cond.load709 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6472,6 +6613,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_712: # %cond.load713 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6488,6 +6630,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_713: # %cond.load717 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6504,6 +6647,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_714: # %cond.load721 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6520,6 +6664,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_715: # %cond.load725 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6536,6 +6681,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_716: # %cond.load729 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6552,6 +6698,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_717: # %cond.load733 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6568,6 +6715,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_718: # %cond.load737 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6584,6 +6732,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_719: # %cond.load741 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6600,6 +6749,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_720: # %cond.load745 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6616,6 +6766,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_721: # %cond.load749 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -6648,6 +6799,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_723: # %cond.load765 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6664,6 +6816,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_724: # %cond.load769 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6680,6 +6833,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_725: # %cond.load773 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6696,6 +6850,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_726: # %cond.load777 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6712,6 +6867,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_727: # %cond.load781 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6728,6 +6884,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_728: # %cond.load785 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6744,6 +6901,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_729: # %cond.load789 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6760,6 +6918,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_730: # %cond.load793 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6776,6 +6935,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_731: # %cond.load797 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6792,6 +6952,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_732: # %cond.load801 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6808,6 +6969,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_733: # %cond.load805 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6824,6 +6986,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_734: # %cond.load809 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6840,6 +7003,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_735: # %cond.load813 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6856,6 +7020,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_736: # %cond.load817 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6872,6 +7037,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_737: # %cond.load821 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6888,6 +7054,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_738: # %cond.load825 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6904,6 +7071,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_739: # %cond.load829 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6920,6 +7088,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_740: # %cond.load833 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6936,6 +7105,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_741: # %cond.load837 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6952,6 +7122,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_742: # %cond.load841 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6968,6 +7139,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_743: # %cond.load845 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -6984,6 +7156,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_744: # %cond.load849 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7000,6 +7173,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_745: # %cond.load853 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7016,6 +7190,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_746: # %cond.load857 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7032,6 +7207,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_747: # %cond.load861 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7048,6 +7224,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_748: # %cond.load865 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7064,6 +7241,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_749: # %cond.load869 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7080,6 +7258,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_750: # %cond.load873 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7096,6 +7275,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_751: # %cond.load877 ; CHECK-RV32-NEXT: lbu a2, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v24, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a2 @@ -7128,6 +7308,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_753: # %cond.load893 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7144,6 +7325,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_754: # %cond.load897 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7160,6 +7342,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_755: # %cond.load901 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7176,6 +7359,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_756: # %cond.load905 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7192,6 +7376,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_757: # %cond.load909 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7208,6 +7393,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_758: # %cond.load913 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7224,6 +7410,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_759: # %cond.load917 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7240,6 +7427,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_760: # %cond.load921 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7256,6 +7444,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_761: # %cond.load925 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7272,6 +7461,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_762: # %cond.load929 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7288,6 +7478,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_763: # %cond.load933 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7304,6 +7495,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_764: # %cond.load937 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7320,6 +7512,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_765: # %cond.load941 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7336,6 +7529,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_766: # %cond.load945 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7352,6 +7546,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_767: # %cond.load949 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7368,6 +7563,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_768: # %cond.load953 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7384,6 +7580,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_769: # %cond.load957 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7400,6 +7597,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_770: # %cond.load961 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7416,6 +7614,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_771: # %cond.load965 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7432,6 +7631,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_772: # %cond.load969 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7448,6 +7648,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_773: # %cond.load973 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7464,6 +7665,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_774: # %cond.load977 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7480,6 +7682,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_775: # %cond.load981 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7496,6 +7699,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_776: # %cond.load985 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7512,6 +7716,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_777: # %cond.load989 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7528,6 +7733,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_778: # %cond.load993 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7544,6 +7750,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_779: # %cond.load997 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7560,6 +7767,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_780: # %cond.load1001 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -7576,6 +7784,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: .LBB61_781: # %cond.load1005 ; CHECK-RV32-NEXT: lbu a3, 0(a0) ; CHECK-RV32-NEXT: li a4, 512 +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv.s.x v12, a3 @@ -10999,6 +11208,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_62: # %cond.load241 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -11280,6 +11490,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_128: # %cond.load497 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -11561,6 +11772,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_194: # %cond.load753 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -11842,6 +12054,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_260: # %cond.load1009 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -12968,10 +13181,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_2 ; CHECK-RV64-NEXT: .LBB61_528: # %cond.load1 ; CHECK-RV64-NEXT: lbu a1, 0(a0) +; CHECK-RV64-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 -; CHECK-RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 -; CHECK-RV64-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 1 ; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 @@ -12981,8 +13193,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_3 ; CHECK-RV64-NEXT: .LBB61_529: # %cond.load5 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 3, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 2 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -12993,8 +13205,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_4 ; CHECK-RV64-NEXT: .LBB61_530: # %cond.load9 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 3 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13005,8 +13217,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_5 ; CHECK-RV64-NEXT: .LBB61_531: # %cond.load13 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 5, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 4 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13017,8 +13229,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_6 ; CHECK-RV64-NEXT: .LBB61_532: # %cond.load17 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 6, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 5 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13029,8 +13241,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_7 ; CHECK-RV64-NEXT: .LBB61_533: # %cond.load21 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 7, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 6 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13041,8 +13253,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_8 ; CHECK-RV64-NEXT: .LBB61_534: # %cond.load25 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 7 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13053,8 +13265,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_9 ; CHECK-RV64-NEXT: .LBB61_535: # %cond.load29 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 9, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 8 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13065,8 +13277,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_10 ; CHECK-RV64-NEXT: .LBB61_536: # %cond.load33 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 10, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 9 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13077,8 +13289,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_11 ; CHECK-RV64-NEXT: .LBB61_537: # %cond.load37 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 11, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 10 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13089,8 +13301,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_12 ; CHECK-RV64-NEXT: .LBB61_538: # %cond.load41 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 12, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 11 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13101,8 +13313,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_13 ; CHECK-RV64-NEXT: .LBB61_539: # %cond.load45 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 13, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 12 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13113,8 +13325,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_14 ; CHECK-RV64-NEXT: .LBB61_540: # %cond.load49 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 14, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 13 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13125,8 +13337,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_15 ; CHECK-RV64-NEXT: .LBB61_541: # %cond.load53 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 15, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 14 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13137,8 +13349,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_16 ; CHECK-RV64-NEXT: .LBB61_542: # %cond.load57 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 15 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13149,8 +13361,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_17 ; CHECK-RV64-NEXT: .LBB61_543: # %cond.load61 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 17, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 16 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13161,8 +13373,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_18 ; CHECK-RV64-NEXT: .LBB61_544: # %cond.load65 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 18, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 17 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13173,8 +13385,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_19 ; CHECK-RV64-NEXT: .LBB61_545: # %cond.load69 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 19, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 18 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13185,8 +13397,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_20 ; CHECK-RV64-NEXT: .LBB61_546: # %cond.load73 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 20, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 19 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13197,8 +13409,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_21 ; CHECK-RV64-NEXT: .LBB61_547: # %cond.load77 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 21, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 20 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13209,8 +13421,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_22 ; CHECK-RV64-NEXT: .LBB61_548: # %cond.load81 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 22, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 21 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13221,8 +13433,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_23 ; CHECK-RV64-NEXT: .LBB61_549: # %cond.load85 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 23, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 22 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13233,8 +13445,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_24 ; CHECK-RV64-NEXT: .LBB61_550: # %cond.load89 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 24, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 23 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13245,8 +13457,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_25 ; CHECK-RV64-NEXT: .LBB61_551: # %cond.load93 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 25, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 24 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13257,8 +13469,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_26 ; CHECK-RV64-NEXT: .LBB61_552: # %cond.load97 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 26, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 25 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13269,8 +13481,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_27 ; CHECK-RV64-NEXT: .LBB61_553: # %cond.load101 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 27, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 26 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13281,8 +13493,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_28 ; CHECK-RV64-NEXT: .LBB61_554: # %cond.load105 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 28, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 27 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13293,8 +13505,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_29 ; CHECK-RV64-NEXT: .LBB61_555: # %cond.load109 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 29, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 28 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13305,8 +13517,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_30 ; CHECK-RV64-NEXT: .LBB61_556: # %cond.load113 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 30, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 29 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13317,8 +13529,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: j .LBB61_31 ; CHECK-RV64-NEXT: .LBB61_557: # %cond.load117 ; CHECK-RV64-NEXT: lbu a1, 0(a0) -; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetivli zero, 31, e8, m1, tu, ma +; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 30 ; CHECK-RV64-NEXT: addi a0, a0, 1 @@ -13330,6 +13542,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_558: # %cond.load121 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13345,6 +13558,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_559: # %cond.load125 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13361,6 +13575,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_560: # %cond.load129 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13377,6 +13592,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_561: # %cond.load133 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13393,6 +13609,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_562: # %cond.load137 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13409,6 +13626,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_563: # %cond.load141 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13425,6 +13643,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_564: # %cond.load145 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13441,6 +13660,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_565: # %cond.load149 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13457,6 +13677,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_566: # %cond.load153 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13473,6 +13694,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_567: # %cond.load157 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13489,6 +13711,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_568: # %cond.load161 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13505,6 +13728,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_569: # %cond.load165 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13521,6 +13745,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_570: # %cond.load169 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13537,6 +13762,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_571: # %cond.load173 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13553,6 +13779,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_572: # %cond.load177 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13569,6 +13796,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_573: # %cond.load181 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13585,6 +13813,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_574: # %cond.load185 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13601,6 +13830,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_575: # %cond.load189 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13617,6 +13847,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_576: # %cond.load193 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13633,6 +13864,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_577: # %cond.load197 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13649,6 +13881,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_578: # %cond.load201 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13665,6 +13898,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_579: # %cond.load205 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13681,6 +13915,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_580: # %cond.load209 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13697,6 +13932,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_581: # %cond.load213 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13713,6 +13949,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_582: # %cond.load217 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13729,6 +13966,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_583: # %cond.load221 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13745,6 +13983,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_584: # %cond.load225 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13761,6 +14000,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_585: # %cond.load229 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13777,6 +14017,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_586: # %cond.load233 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13793,6 +14034,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_587: # %cond.load237 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v9, a1 @@ -13825,6 +14067,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_589: # %cond.load253 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13841,6 +14084,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_590: # %cond.load257 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13857,6 +14101,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_591: # %cond.load261 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13873,6 +14118,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_592: # %cond.load265 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13889,6 +14135,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_593: # %cond.load269 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13905,6 +14152,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_594: # %cond.load273 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13921,6 +14169,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_595: # %cond.load277 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13937,6 +14186,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_596: # %cond.load281 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13953,6 +14203,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_597: # %cond.load285 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13969,6 +14220,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_598: # %cond.load289 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -13985,6 +14237,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_599: # %cond.load293 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14001,6 +14254,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_600: # %cond.load297 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14017,6 +14271,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_601: # %cond.load301 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14033,6 +14288,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_602: # %cond.load305 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14049,6 +14305,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_603: # %cond.load309 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14065,6 +14322,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_604: # %cond.load313 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14081,6 +14339,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_605: # %cond.load317 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14097,6 +14356,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_606: # %cond.load321 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14113,6 +14373,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_607: # %cond.load325 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14129,6 +14390,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_608: # %cond.load329 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14145,6 +14407,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_609: # %cond.load333 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14161,6 +14424,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_610: # %cond.load337 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14177,6 +14441,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_611: # %cond.load341 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14193,6 +14458,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_612: # %cond.load345 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14209,6 +14475,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_613: # %cond.load349 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14225,6 +14492,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_614: # %cond.load353 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14241,6 +14509,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_615: # %cond.load357 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14257,6 +14526,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_616: # %cond.load361 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14273,6 +14543,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_617: # %cond.load365 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14289,6 +14560,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_618: # %cond.load369 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14305,6 +14577,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_619: # %cond.load373 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14321,6 +14594,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_620: # %cond.load377 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14337,6 +14611,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_621: # %cond.load381 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14353,6 +14628,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_622: # %cond.load385 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14369,6 +14645,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_623: # %cond.load389 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14385,6 +14662,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_624: # %cond.load393 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14401,6 +14679,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_625: # %cond.load397 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14417,6 +14696,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_626: # %cond.load401 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14433,6 +14713,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_627: # %cond.load405 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14449,6 +14730,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_628: # %cond.load409 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14465,6 +14747,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_629: # %cond.load413 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14481,6 +14764,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_630: # %cond.load417 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14497,6 +14781,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_631: # %cond.load421 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14513,6 +14798,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_632: # %cond.load425 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14529,6 +14815,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_633: # %cond.load429 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14545,6 +14832,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_634: # %cond.load433 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14561,6 +14849,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_635: # %cond.load437 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14577,6 +14866,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_636: # %cond.load441 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14593,6 +14883,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_637: # %cond.load445 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14609,6 +14900,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_638: # %cond.load449 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14625,6 +14917,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_639: # %cond.load453 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14641,6 +14934,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_640: # %cond.load457 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14657,6 +14951,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_641: # %cond.load461 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14673,6 +14968,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_642: # %cond.load465 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14689,6 +14985,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_643: # %cond.load469 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14705,6 +15002,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_644: # %cond.load473 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14721,6 +15019,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_645: # %cond.load477 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14737,6 +15036,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_646: # %cond.load481 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14753,6 +15053,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_647: # %cond.load485 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14769,6 +15070,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_648: # %cond.load489 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14785,6 +15087,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_649: # %cond.load493 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v10, a2 @@ -14817,6 +15120,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_651: # %cond.load509 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14833,6 +15137,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_652: # %cond.load513 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14849,6 +15154,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_653: # %cond.load517 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14865,6 +15171,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_654: # %cond.load521 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14881,6 +15188,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_655: # %cond.load525 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14897,6 +15205,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_656: # %cond.load529 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14913,6 +15222,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_657: # %cond.load533 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14929,6 +15239,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_658: # %cond.load537 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14945,6 +15256,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_659: # %cond.load541 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14961,6 +15273,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_660: # %cond.load545 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14977,6 +15290,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_661: # %cond.load549 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -14993,6 +15307,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_662: # %cond.load553 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15009,6 +15324,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_663: # %cond.load557 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15025,6 +15341,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_664: # %cond.load561 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15041,6 +15358,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_665: # %cond.load565 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15057,6 +15375,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_666: # %cond.load569 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15073,6 +15392,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_667: # %cond.load573 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15089,6 +15409,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_668: # %cond.load577 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15105,6 +15426,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_669: # %cond.load581 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15121,6 +15443,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_670: # %cond.load585 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15137,6 +15460,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_671: # %cond.load589 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15153,6 +15477,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_672: # %cond.load593 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15169,6 +15494,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_673: # %cond.load597 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15185,6 +15511,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_674: # %cond.load601 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15201,6 +15528,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_675: # %cond.load605 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15217,6 +15545,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_676: # %cond.load609 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15233,6 +15562,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_677: # %cond.load613 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15249,6 +15579,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_678: # %cond.load617 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15265,6 +15596,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_679: # %cond.load621 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15281,6 +15613,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_680: # %cond.load625 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15297,6 +15630,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_681: # %cond.load629 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15313,6 +15647,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_682: # %cond.load633 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15329,6 +15664,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_683: # %cond.load637 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15345,6 +15681,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_684: # %cond.load641 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15361,6 +15698,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_685: # %cond.load645 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15377,6 +15715,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_686: # %cond.load649 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15393,6 +15732,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_687: # %cond.load653 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15409,6 +15749,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_688: # %cond.load657 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15425,6 +15766,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_689: # %cond.load661 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15441,6 +15783,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_690: # %cond.load665 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15457,6 +15800,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_691: # %cond.load669 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15473,6 +15817,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_692: # %cond.load673 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15489,6 +15834,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_693: # %cond.load677 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15505,6 +15851,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_694: # %cond.load681 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15521,6 +15868,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_695: # %cond.load685 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15537,6 +15885,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_696: # %cond.load689 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15553,6 +15902,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_697: # %cond.load693 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15569,6 +15919,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_698: # %cond.load697 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15585,6 +15936,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_699: # %cond.load701 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15601,6 +15953,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_700: # %cond.load705 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15617,6 +15970,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_701: # %cond.load709 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15633,6 +15987,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_702: # %cond.load713 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15649,6 +16004,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_703: # %cond.load717 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15665,6 +16021,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_704: # %cond.load721 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15681,6 +16038,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_705: # %cond.load725 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15697,6 +16055,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_706: # %cond.load729 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15713,6 +16072,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_707: # %cond.load733 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15729,6 +16089,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_708: # %cond.load737 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15745,6 +16106,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_709: # %cond.load741 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15761,6 +16123,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_710: # %cond.load745 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15777,6 +16140,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_711: # %cond.load749 ; CHECK-RV64-NEXT: lbu a1, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a1 @@ -15809,6 +16173,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_713: # %cond.load765 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15825,6 +16190,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_714: # %cond.load769 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15841,6 +16207,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_715: # %cond.load773 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15857,6 +16224,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_716: # %cond.load777 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15873,6 +16241,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_717: # %cond.load781 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15889,6 +16258,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_718: # %cond.load785 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15905,6 +16275,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_719: # %cond.load789 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15921,6 +16292,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_720: # %cond.load793 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15937,6 +16309,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_721: # %cond.load797 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15953,6 +16326,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_722: # %cond.load801 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15969,6 +16343,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_723: # %cond.load805 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -15985,6 +16360,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_724: # %cond.load809 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16001,6 +16377,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_725: # %cond.load813 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16017,6 +16394,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_726: # %cond.load817 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16033,6 +16411,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_727: # %cond.load821 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16049,6 +16428,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_728: # %cond.load825 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16065,6 +16445,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_729: # %cond.load829 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16081,6 +16462,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_730: # %cond.load833 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16097,6 +16479,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_731: # %cond.load837 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16113,6 +16496,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_732: # %cond.load841 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16129,6 +16513,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_733: # %cond.load845 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16145,6 +16530,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_734: # %cond.load849 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16161,6 +16547,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_735: # %cond.load853 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16177,6 +16564,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_736: # %cond.load857 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16193,6 +16581,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_737: # %cond.load861 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16209,6 +16598,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_738: # %cond.load865 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16225,6 +16615,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_739: # %cond.load869 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16241,6 +16632,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_740: # %cond.load873 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16257,6 +16649,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_741: # %cond.load877 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16273,6 +16666,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_742: # %cond.load881 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16289,6 +16683,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_743: # %cond.load885 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16305,6 +16700,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_744: # %cond.load889 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16321,6 +16717,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_745: # %cond.load893 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16337,6 +16734,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_746: # %cond.load897 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16353,6 +16751,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_747: # %cond.load901 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16369,6 +16768,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_748: # %cond.load905 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16385,6 +16785,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_749: # %cond.load909 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16401,6 +16802,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_750: # %cond.load913 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16417,6 +16819,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_751: # %cond.load917 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16433,6 +16836,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_752: # %cond.load921 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16449,6 +16853,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_753: # %cond.load925 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16465,6 +16870,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_754: # %cond.load929 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16481,6 +16887,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_755: # %cond.load933 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16497,6 +16904,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_756: # %cond.load937 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16513,6 +16921,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_757: # %cond.load941 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16529,6 +16938,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_758: # %cond.load945 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16545,6 +16955,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_759: # %cond.load949 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16561,6 +16972,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_760: # %cond.load953 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16577,6 +16989,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_761: # %cond.load957 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16593,6 +17006,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_762: # %cond.load961 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16609,6 +17023,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_763: # %cond.load965 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16625,6 +17040,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_764: # %cond.load969 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16641,6 +17057,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_765: # %cond.load973 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16657,6 +17074,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_766: # %cond.load977 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16673,6 +17091,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_767: # %cond.load981 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16689,6 +17108,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_768: # %cond.load985 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16705,6 +17125,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_769: # %cond.load989 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16721,6 +17142,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_770: # %cond.load993 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16737,6 +17159,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_771: # %cond.load997 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16753,6 +17176,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_772: # %cond.load1001 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 @@ -16769,6 +17193,7 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: .LBB61_773: # %cond.load1005 ; CHECK-RV64-NEXT: lbu a2, 0(a0) ; CHECK-RV64-NEXT: li a3, 512 +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv.s.x v12, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index 869478a1efa78d..5a974f88ed8f38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -13,6 +13,7 @@ define @extract_nxv8i32_nxv4i32_0( %vec) { define @extract_nxv8i32_nxv4i32_4( %vec) { ; CHECK-LABEL: extract_nxv8i32_nxv4i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv4i32.nxv8i32( %vec, i64 4) @@ -30,6 +31,7 @@ define @extract_nxv8i32_nxv2i32_0( %vec) { define @extract_nxv8i32_nxv2i32_2( %vec) { ; CHECK-LABEL: extract_nxv8i32_nxv2i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv8i32( %vec, i64 2) @@ -39,6 +41,7 @@ define @extract_nxv8i32_nxv2i32_2( %vec) { define @extract_nxv8i32_nxv2i32_4( %vec) { ; CHECK-LABEL: extract_nxv8i32_nxv2i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv8i32( %vec, i64 4) @@ -48,6 +51,7 @@ define @extract_nxv8i32_nxv2i32_4( %vec) { define @extract_nxv8i32_nxv2i32_6( %vec) { ; CHECK-LABEL: extract_nxv8i32_nxv2i32_6: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v11 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv8i32( %vec, i64 6) @@ -65,6 +69,7 @@ define @extract_nxv16i32_nxv8i32_0( %vec) define @extract_nxv16i32_nxv8i32_8( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv8i32_8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv8i32.nxv16i32( %vec, i64 8) @@ -82,6 +87,7 @@ define @extract_nxv16i32_nxv4i32_0( %vec) define @extract_nxv16i32_nxv4i32_4( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv4i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv4i32.nxv16i32( %vec, i64 4) @@ -91,6 +97,7 @@ define @extract_nxv16i32_nxv4i32_4( %vec) define @extract_nxv16i32_nxv4i32_8( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv4i32_8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv4i32.nxv16i32( %vec, i64 8) @@ -100,6 +107,7 @@ define @extract_nxv16i32_nxv4i32_8( %vec) define @extract_nxv16i32_nxv4i32_12( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv4i32_12: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v14 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv4i32.nxv16i32( %vec, i64 12) @@ -117,6 +125,7 @@ define @extract_nxv16i32_nxv2i32_0( %vec) define @extract_nxv16i32_nxv2i32_2( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv2i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv16i32( %vec, i64 2) @@ -126,6 +135,7 @@ define @extract_nxv16i32_nxv2i32_2( %vec) define @extract_nxv16i32_nxv2i32_4( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv2i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv16i32( %vec, i64 4) @@ -135,6 +145,7 @@ define @extract_nxv16i32_nxv2i32_4( %vec) define @extract_nxv16i32_nxv2i32_6( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv2i32_6: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v11 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv16i32( %vec, i64 6) @@ -144,6 +155,7 @@ define @extract_nxv16i32_nxv2i32_6( %vec) define @extract_nxv16i32_nxv2i32_8( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv2i32_8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv16i32( %vec, i64 8) @@ -153,6 +165,7 @@ define @extract_nxv16i32_nxv2i32_8( %vec) define @extract_nxv16i32_nxv2i32_10( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv2i32_10: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v13 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv16i32( %vec, i64 10) @@ -162,6 +175,7 @@ define @extract_nxv16i32_nxv2i32_10( %vec) define @extract_nxv16i32_nxv2i32_12( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv2i32_12: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v14 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv16i32( %vec, i64 12) @@ -171,6 +185,7 @@ define @extract_nxv16i32_nxv2i32_12( %vec) define @extract_nxv16i32_nxv2i32_14( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv2i32_14: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v15 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i32.nxv16i32( %vec, i64 14) @@ -224,6 +239,7 @@ define @extract_nxv16i32_nxv1i32_15( %vec) define @extract_nxv16i32_nxv1i32_2( %vec) { ; CHECK-LABEL: extract_nxv16i32_nxv1i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv1i32.nxv16i32( %vec, i64 2) @@ -287,6 +303,7 @@ define @extract_nxv32i8_nxv2i8_6( %vec) { define @extract_nxv32i8_nxv2i8_8( %vec) { ; CHECK-LABEL: extract_nxv32i8_nxv2i8_8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2i8.nxv32i8( %vec, i64 8) @@ -357,6 +374,7 @@ define @extract_nxv2f16_nxv16f16_2( %vec define @extract_nxv2f16_nxv16f16_4( %vec) { ; CHECK-LABEL: extract_nxv2f16_nxv16f16_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2f16.nxv16f16( %vec, i64 4) @@ -504,6 +522,7 @@ define @extract_nxv2bf16_nxv16bf16_2( @extract_nxv2bf16_nxv16bf16_4( %vec) { ; CHECK-LABEL: extract_nxv2bf16_nxv16bf16_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %c = call @llvm.vector.extract.nxv2bf16.nxv16bf16( %vec, i64 4) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index ce83e2d8a62206..1a5ca429b531fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -16,10 +16,10 @@ define <512 x i8> @single_source(<512 x i8> %a) { ; CHECK-NEXT: addi s0, sp, 1536 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: andi sp, sp, -512 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: li a0, 512 ; CHECK-NEXT: addi a1, sp, 512 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv.x.s a2, v16 ; CHECK-NEXT: vslidedown.vi v24, v16, 5 ; CHECK-NEXT: li a3, 432 @@ -104,10 +104,10 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: addi s0, sp, 1536 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: andi sp, sp, -512 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a0, 512 ; CHECK-NEXT: addi a1, sp, 512 -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v24, 5 ; CHECK-NEXT: vmv.x.s a2, v24 ; CHECK-NEXT: li a3, 432 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 3eb5d36b4896a7..226cce4dbaf09f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1659,6 +1659,7 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv8r.v v24, v8 ; RV32-NEXT: lui a2, 1044480 ; RV32-NEXT: lui a3, 61681 @@ -2055,6 +2056,7 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv8r.v v24, v8 ; RV32-NEXT: lui a2, 1044480 ; RV32-NEXT: lui a3, 61681 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll index ee953a66a004f3..e2ce999eb0f77f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -180,6 +180,7 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a1, 2 ; CHECK-NEXT: vmv8r.v v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll index 73e148edbe2d67..d83540281b415b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -180,6 +180,7 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> % ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a1, 2 ; CHECK-NEXT: vmv8r.v v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 511242aa677c2a..d40c672cdb6a1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -194,8 +194,8 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_ceil_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -261,6 +261,7 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) @@ -280,8 +281,8 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ; ZVFHMIN-LABEL: vp_ceil_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -431,6 +432,7 @@ declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, i32) define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -475,6 +477,7 @@ declare <16 x float> @llvm.vp.ceil.v16f32(<16 x float>, <16 x i1>, i32) define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -561,6 +564,7 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) @@ -605,6 +609,7 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) @@ -649,6 +654,7 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -693,6 +699,7 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -743,6 +750,7 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -750,7 +758,6 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index 5e73e6df9170c2..bbf97891fd9b10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1796,6 +1796,7 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv8r.v v24, v16 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: lui a2, 209715 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 02e99ea513e69b..9bce8d4960e89f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -194,8 +194,8 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_floor_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -261,6 +261,7 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) @@ -280,8 +281,8 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_floor_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -431,6 +432,7 @@ declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, i32) define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -475,6 +477,7 @@ declare <16 x float> @llvm.vp.floor.v16f32(<16 x float>, <16 x i1>, i32) define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -561,6 +564,7 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) @@ -605,6 +609,7 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) @@ -649,6 +654,7 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -693,6 +699,7 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -743,6 +750,7 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -750,7 +758,6 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index f43934afc370df..ef6a429d391a62 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -13,6 +13,7 @@ declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v2f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -26,8 +27,8 @@ define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -83,6 +84,7 @@ declare <4 x half> @llvm.vp.maximum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v4f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -96,8 +98,8 @@ define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -153,6 +155,7 @@ declare <8 x half> @llvm.vp.maximum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -166,8 +169,8 @@ define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t @@ -225,6 +228,7 @@ declare <16 x half> @llvm.vp.maximum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -240,8 +244,8 @@ define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t @@ -299,6 +303,7 @@ declare <2 x float> @llvm.vp.maximum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmax_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -332,6 +337,7 @@ declare <4 x float> @llvm.vp.maximum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -365,6 +371,7 @@ declare <8 x float> @llvm.vp.maximum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -400,6 +407,7 @@ declare <16 x float> @llvm.vp.maximum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -435,6 +443,7 @@ declare <2 x double> @llvm.vp.maximum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmax_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -468,6 +477,7 @@ declare <4 x double> @llvm.vp.maximum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -503,6 +513,7 @@ declare <8 x double> @llvm.vp.maximum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -544,6 +555,7 @@ define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t @@ -595,6 +607,7 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -608,7 +621,6 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 7067cc21ab56d5..ca6b86f8325c4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -13,6 +13,7 @@ declare <2 x half> @llvm.vp.minimum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v2f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -26,8 +27,8 @@ define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -83,6 +84,7 @@ declare <4 x half> @llvm.vp.minimum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v4f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -96,8 +98,8 @@ define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -153,6 +155,7 @@ declare <8 x half> @llvm.vp.minimum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -166,8 +169,8 @@ define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t @@ -225,6 +228,7 @@ declare <16 x half> @llvm.vp.minimum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -240,8 +244,8 @@ define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t @@ -299,6 +303,7 @@ declare <2 x float> @llvm.vp.minimum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmin_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -332,6 +337,7 @@ declare <4 x float> @llvm.vp.minimum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -365,6 +371,7 @@ declare <8 x float> @llvm.vp.minimum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -400,6 +407,7 @@ declare <16 x float> @llvm.vp.minimum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmin_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -435,6 +443,7 @@ declare <2 x double> @llvm.vp.minimum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmin_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -468,6 +477,7 @@ declare <4 x double> @llvm.vp.minimum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmin_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -503,6 +513,7 @@ declare <8 x double> @llvm.vp.minimum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmin_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -544,6 +555,7 @@ define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t @@ -595,6 +607,7 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -608,7 +621,6 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index e4b8e9debad271..56da941051908e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -38,8 +38,8 @@ define <4 x float> @interleave_v2f32(<2 x float> %x, <2 x float> %y) { define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; V128-LABEL: interleave_v2f64: ; V128: # %bb.0: -; V128-NEXT: vmv1r.v v12, v9 ; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; V128-NEXT: vmv1r.v v12, v9 ; V128-NEXT: vid.v v9 ; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vsrl.vi v14, v9, 1 @@ -242,12 +242,12 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: slli a0, a0, 3 ; V128-NEXT: sub sp, sp, a0 ; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vmv8r.v v24, v16 ; V128-NEXT: vmv8r.v v16, v8 ; V128-NEXT: vmv8r.v v8, v24 ; V128-NEXT: addi a0, sp, 16 ; V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vslidedown.vi v0, v24, 16 ; V128-NEXT: li a0, -1 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll index e64c7c87132eee..582706e4dfa184 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -97,9 +97,9 @@ declare <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double>, <32 x i1>, i3 define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_v32f32_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll index a68dc11f3d21e7..fc7cd94ca3de85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -712,8 +712,8 @@ define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 1fbc8dfd688c4b..9023d9732ef182 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -133,6 +133,7 @@ define @insert_nxv8i32_v4i32_0( %vec, <4 x ; ; VLS-LABEL: insert_nxv8i32_v4i32_0: ; VLS: # %bb.0: +; VLS-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; VLS-NEXT: vmv1r.v v8, v9 ; VLS-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.v4i32( %vec, <4 x i32> %subvec, i64 0) @@ -143,6 +144,7 @@ define @insert_nxv8i32_v4i32_0( %vec, <4 x define <4 x i32> @insert_v4i32_v4i32_0(<4 x i32> %vec, <4 x i32> %subvec) { ; CHECK-LABEL: insert_v4i32_v4i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %v = call <4 x i32> @llvm.vector.insert.v4i32.v4i32(<4 x i32> %vec, <4 x i32> %subvec, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 66af5718fb9dc5..346e641ae06e22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -51,8 +51,8 @@ define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) { define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; V128-LABEL: interleave_v2i64: ; V128: # %bb.0: -; V128-NEXT: vmv1r.v v12, v9 ; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; V128-NEXT: vmv1r.v v12, v9 ; V128-NEXT: vid.v v9 ; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vsrl.vi v14, v9, 1 @@ -411,12 +411,12 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: slli a0, a0, 3 ; V128-NEXT: sub sp, sp, a0 ; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vmv8r.v v24, v16 ; V128-NEXT: vmv8r.v v16, v8 ; V128-NEXT: vmv8r.v v8, v24 ; V128-NEXT: addi a0, sp, 16 ; V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vslidedown.vi v0, v24, 16 ; V128-NEXT: li a0, -1 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 6cc3f7e76797bd..722a0aff8d9e44 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -556,11 +556,13 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { define <4 x i8> @mgather_falsemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV32-LABEL: mgather_falsemask_v4i8: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4i8: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; @@ -733,13 +735,13 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB12_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB12_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB12_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB12_15 ; RV64ZVE32F-NEXT: .LBB12_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB12_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB12_16 ; RV64ZVE32F-NEXT: .LBB12_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_9 @@ -756,14 +758,31 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB12_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB12_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lbu a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB12_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB12_16 -; RV64ZVE32F-NEXT: .LBB12_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB12_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lbu a0, 0(a0) +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB12_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB12_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -772,7 +791,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_6 -; RV64ZVE32F-NEXT: .LBB12_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -783,7 +802,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_7 -; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -793,26 +812,6 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_8 ; RV64ZVE32F-NEXT: j .LBB12_9 -; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB12_11 -; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru) ret <8 x i8> %v @@ -1253,11 +1252,13 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { define <4 x i16> @mgather_falsemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { ; RV32-LABEL: mgather_falsemask_v4i16: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4i16: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; @@ -1435,13 +1436,13 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB23_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB23_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB23_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB23_15 ; RV64ZVE32F-NEXT: .LBB23_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB23_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB23_16 ; RV64ZVE32F-NEXT: .LBB23_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_9 @@ -1460,14 +1461,35 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB23_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB23_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB23_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB23_16 -; RV64ZVE32F-NEXT: .LBB23_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB23_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB23_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB23_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -1478,7 +1500,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_6 -; RV64ZVE32F-NEXT: .LBB23_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1491,7 +1513,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_7 -; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -1504,30 +1526,6 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_8 ; RV64ZVE32F-NEXT: j .LBB23_9 -; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB23_11 -; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) ret <8 x i16> %v @@ -1587,13 +1585,13 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB24_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB24_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB24_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB24_15 ; RV64ZVE32F-NEXT: .LBB24_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB24_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB24_16 ; RV64ZVE32F-NEXT: .LBB24_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_9 @@ -1612,14 +1610,35 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB24_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB24_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB24_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB24_16 -; RV64ZVE32F-NEXT: .LBB24_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB24_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB24_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB24_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -1630,7 +1649,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_6 -; RV64ZVE32F-NEXT: .LBB24_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1643,7 +1662,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_7 -; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -1656,30 +1675,6 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_8 ; RV64ZVE32F-NEXT: j .LBB24_9 -; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB24_11 -; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) @@ -1740,13 +1735,13 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB25_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB25_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB25_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB25_15 ; RV64ZVE32F-NEXT: .LBB25_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB25_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB25_16 ; RV64ZVE32F-NEXT: .LBB25_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_9 @@ -1766,14 +1761,37 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB25_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB25_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: andi a2, a2, 255 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB25_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB25_16 -; RV64ZVE32F-NEXT: .LBB25_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB25_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: andi a1, a1, 255 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB25_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB25_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -1785,7 +1803,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_6 -; RV64ZVE32F-NEXT: .LBB25_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1799,7 +1817,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_7 -; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -1813,32 +1831,6 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_8 ; RV64ZVE32F-NEXT: j .LBB25_9 -; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: andi a2, a2, 255 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB25_11 -; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: andi a1, a1, 255 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) @@ -1896,13 +1888,13 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB26_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB26_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB26_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB26_15 ; RV64ZVE32F-NEXT: .LBB26_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB26_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB26_16 ; RV64ZVE32F-NEXT: .LBB26_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_9 @@ -1920,14 +1912,33 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB26_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB26_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB26_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB26_16 -; RV64ZVE32F-NEXT: .LBB26_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB26_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB26_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB26_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -1937,7 +1948,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_6 -; RV64ZVE32F-NEXT: .LBB26_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1949,7 +1960,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_7 -; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -1960,28 +1971,6 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_8 ; RV64ZVE32F-NEXT: j .LBB26_9 -; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB26_11 -; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru) ret <8 x i16> %v @@ -2311,11 +2300,13 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { define <4 x i32> @mgather_falsemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { ; RV32-LABEL: mgather_falsemask_v4i32: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4i32: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; @@ -2492,13 +2483,13 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB35_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB35_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 ; RV64ZVE32F-NEXT: .LBB35_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB35_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB35_16 ; RV64ZVE32F-NEXT: .LBB35_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9 @@ -2517,14 +2508,35 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB35_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB35_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB35_16 -; RV64ZVE32F-NEXT: .LBB35_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB35_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB35_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB35_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -2535,7 +2547,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 -; RV64ZVE32F-NEXT: .LBB35_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -2548,7 +2560,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 -; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -2561,30 +2573,6 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 ; RV64ZVE32F-NEXT: j .LBB35_9 -; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB35_11 -; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) ret <8 x i32> %v @@ -2643,13 +2631,13 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB36_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB36_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB36_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB36_15 ; RV64ZVE32F-NEXT: .LBB36_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB36_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB36_16 ; RV64ZVE32F-NEXT: .LBB36_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_9 @@ -2668,14 +2656,35 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB36_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB36_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB36_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB36_16 -; RV64ZVE32F-NEXT: .LBB36_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB36_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB36_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB36_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -2686,7 +2695,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_6 -; RV64ZVE32F-NEXT: .LBB36_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -2699,7 +2708,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_7 -; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -2712,30 +2721,6 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 ; RV64ZVE32F-NEXT: j .LBB36_9 -; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB36_11 -; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) @@ -2798,13 +2783,13 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB37_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB37_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB37_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB37_15 ; RV64ZVE32F-NEXT: .LBB37_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB37_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB37_16 ; RV64ZVE32F-NEXT: .LBB37_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_9 @@ -2824,14 +2809,37 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB37_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB37_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: andi a2, a2, 255 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB37_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB37_16 -; RV64ZVE32F-NEXT: .LBB37_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB37_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: andi a1, a1, 255 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB37_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB37_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -2843,7 +2851,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_6 -; RV64ZVE32F-NEXT: .LBB37_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -2857,7 +2865,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_7 -; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -2871,32 +2879,6 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 ; RV64ZVE32F-NEXT: j .LBB37_9 -; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: andi a2, a2, 255 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB37_11 -; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: andi a1, a1, 255 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) @@ -2957,13 +2939,13 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB38_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB38_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB38_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB38_15 ; RV64ZVE32F-NEXT: .LBB38_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB38_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB38_16 ; RV64ZVE32F-NEXT: .LBB38_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_9 @@ -2982,14 +2964,35 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB38_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB38_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB38_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB38_16 -; RV64ZVE32F-NEXT: .LBB38_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB38_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB38_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB38_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -3000,7 +3003,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_6 -; RV64ZVE32F-NEXT: .LBB38_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -3013,7 +3016,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_7 -; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -3026,30 +3029,6 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 ; RV64ZVE32F-NEXT: j .LBB38_9 -; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB38_11 -; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) ret <8 x i32> %v @@ -3109,13 +3088,13 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB39_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB39_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB39_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB39_15 ; RV64ZVE32F-NEXT: .LBB39_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB39_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB39_16 ; RV64ZVE32F-NEXT: .LBB39_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_9 @@ -3134,14 +3113,35 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB39_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB39_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB39_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB39_16 -; RV64ZVE32F-NEXT: .LBB39_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB39_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB39_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB39_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -3152,7 +3152,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_6 -; RV64ZVE32F-NEXT: .LBB39_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -3165,7 +3165,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_7 -; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -3178,30 +3178,6 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 ; RV64ZVE32F-NEXT: j .LBB39_9 -; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB39_11 -; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) @@ -3265,13 +3241,13 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_12 +; RV64ZVE32F-NEXT: bnez a3, .LBB40_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_13 +; RV64ZVE32F-NEXT: bnez a3, .LBB40_15 ; RV64ZVE32F-NEXT: .LBB40_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB40_16 ; RV64ZVE32F-NEXT: .LBB40_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_9 @@ -3291,14 +3267,37 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a3, .LBB40_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: and a3, a3, a1 +; RV64ZVE32F-NEXT: slli a3, a3, 2 +; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB40_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB40_16 -; RV64ZVE32F-NEXT: .LBB40_11: # %else20 +; RV64ZVE32F-NEXT: beqz a2, .LBB40_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: and a1, a2, a1 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB40_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB40_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 @@ -3310,7 +3309,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_6 -; RV64ZVE32F-NEXT: .LBB40_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -3324,7 +3323,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_7 -; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -3338,32 +3337,6 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 ; RV64ZVE32F-NEXT: j .LBB40_9 -; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB40_11 -; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: and a1, a2, a1 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) @@ -3420,13 +3393,13 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB41_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB41_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB41_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB41_15 ; RV64ZVE32F-NEXT: .LBB41_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB41_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB41_16 ; RV64ZVE32F-NEXT: .LBB41_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_9 @@ -3444,35 +3417,54 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB41_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB41_16 -; RV64ZVE32F-NEXT: .LBB41_11: # %else20 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB41_12: # %cond.load4 +; RV64ZVE32F-NEXT: beqz a2, .LBB41_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB41_6 -; RV64ZVE32F-NEXT: .LBB41_13: # %cond.load7 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB41_11: # %else17 +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: beqz a1, .LBB41_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB41_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma +; RV64ZVE32F-NEXT: vmv2r.v v8, v10 +; RV64ZVE32F-NEXT: ret +; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load4 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: beqz a2, .LBB41_6 +; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load7 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_7 -; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -3483,28 +3475,6 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_8 ; RV64ZVE32F-NEXT: j .LBB41_9 -; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB41_11 -; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) ret <8 x i32> %v @@ -3822,11 +3792,13 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { ; RV32V-LABEL: mgather_falsemask_v4i64: ; RV32V: # %bb.0: +; RV32V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32V-NEXT: vmv2r.v v8, v10 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4i64: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv2r.v v8, v10 ; RV64V-NEXT: ret ; @@ -7113,11 +7085,13 @@ define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %pass define <4 x bfloat> @mgather_falsemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) { ; RV32-LABEL: mgather_falsemask_v4bf16: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4bf16: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; @@ -7295,13 +7269,13 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB64_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB64_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB64_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB64_15 ; RV64ZVE32F-NEXT: .LBB64_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB64_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB64_16 ; RV64ZVE32F-NEXT: .LBB64_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_9 @@ -7320,14 +7294,35 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB64_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB64_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB64_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB64_16 -; RV64ZVE32F-NEXT: .LBB64_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB64_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB64_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB64_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7338,7 +7333,7 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_6 -; RV64ZVE32F-NEXT: .LBB64_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -7351,7 +7346,7 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_7 -; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -7364,30 +7359,6 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_8 ; RV64ZVE32F-NEXT: j .LBB64_9 -; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB64_11 -; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) ret <8 x bfloat> %v @@ -7447,13 +7418,13 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB65_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB65_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB65_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB65_15 ; RV64ZVE32F-NEXT: .LBB65_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB65_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB65_16 ; RV64ZVE32F-NEXT: .LBB65_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_9 @@ -7472,14 +7443,35 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB65_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB65_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB65_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB65_16 -; RV64ZVE32F-NEXT: .LBB65_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB65_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB65_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB65_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7490,7 +7482,7 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_6 -; RV64ZVE32F-NEXT: .LBB65_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -7503,7 +7495,7 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_7 -; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -7516,30 +7508,6 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_8 ; RV64ZVE32F-NEXT: j .LBB65_9 -; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB65_11 -; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) @@ -7600,13 +7568,13 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB66_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB66_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB66_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB66_15 ; RV64ZVE32F-NEXT: .LBB66_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB66_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB66_16 ; RV64ZVE32F-NEXT: .LBB66_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_9 @@ -7626,14 +7594,37 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB66_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB66_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: andi a2, a2, 255 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB66_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB66_16 -; RV64ZVE32F-NEXT: .LBB66_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB66_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: andi a1, a1, 255 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB66_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB66_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -7645,7 +7636,7 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_6 -; RV64ZVE32F-NEXT: .LBB66_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -7659,7 +7650,7 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_7 -; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -7673,32 +7664,6 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_8 ; RV64ZVE32F-NEXT: j .LBB66_9 -; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: andi a2, a2, 255 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB66_11 -; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: andi a1, a1, 255 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) @@ -7756,13 +7721,13 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB67_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB67_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB67_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB67_15 ; RV64ZVE32F-NEXT: .LBB67_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB67_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB67_16 ; RV64ZVE32F-NEXT: .LBB67_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_9 @@ -7780,14 +7745,33 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB67_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB67_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 1 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-NEXT: .LBB67_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB67_16 -; RV64ZVE32F-NEXT: .LBB67_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB67_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 1 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-NEXT: .LBB67_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB67_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7797,7 +7781,7 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_6 -; RV64ZVE32F-NEXT: .LBB67_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -7809,7 +7793,7 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_7 -; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 @@ -7820,28 +7804,6 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_8 ; RV64ZVE32F-NEXT: j .LBB67_9 -; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 1 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB67_11 -; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 1 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru) ret <8 x bfloat> %v @@ -8135,11 +8097,13 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) { ; RV32-LABEL: mgather_falsemask_v4f16: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4f16: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; @@ -8410,13 +8374,13 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_12 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_13 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_6: # %else8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_16 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_7: # %else11 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_9 @@ -8435,14 +8399,35 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15 -; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_11 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_11: # %else17 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB74_16 -; RV64ZVE32F-ZVFH-NEXT: .LBB74_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB74_13 +; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_13: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFH-NEXT: ret -; RV64ZVE32F-ZVFH-NEXT: .LBB74_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_14: # %cond.load4 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 @@ -8453,7 +8438,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_6 -; RV64ZVE32F-ZVFH-NEXT: .LBB74_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load7 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -8466,7 +8451,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_7 -; RV64ZVE32F-ZVFH-NEXT: .LBB74_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load10 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 @@ -8479,30 +8464,6 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_8 ; RV64ZVE32F-ZVFH-NEXT: j .LBB74_9 -; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load16 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB74_11 -; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load19 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFH-NEXT: ret ; ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16: ; RV64ZVE32F-ZVFHMIN: # %bb.0: @@ -8537,13 +8498,13 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_12 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_13 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_16 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else11 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_9 @@ -8562,14 +8523,35 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15 -; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_11 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %else17 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB74_16 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB74_13 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFHMIN-NEXT: ret -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load4 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 @@ -8580,7 +8562,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_6 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load7 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -8593,7 +8575,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_7 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load10 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 @@ -8606,30 +8588,6 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_8 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_9 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load16 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) -; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB74_11 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load19 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) -; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFHMIN-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) ret <8 x half> %v @@ -8689,13 +8647,13 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_12 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_13 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_6: # %else8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_16 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_7: # %else11 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_9 @@ -8714,14 +8672,35 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15 -; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_11 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_11: # %else17 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB75_16 -; RV64ZVE32F-ZVFH-NEXT: .LBB75_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB75_13 +; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_13: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFH-NEXT: ret -; RV64ZVE32F-ZVFH-NEXT: .LBB75_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_14: # %cond.load4 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 @@ -8732,7 +8711,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_6 -; RV64ZVE32F-ZVFH-NEXT: .LBB75_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load7 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -8745,7 +8724,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_7 -; RV64ZVE32F-ZVFH-NEXT: .LBB75_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load10 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 @@ -8758,30 +8737,6 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_8 ; RV64ZVE32F-ZVFH-NEXT: j .LBB75_9 -; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load16 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB75_11 -; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load19 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFH-NEXT: ret ; ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16: ; RV64ZVE32F-ZVFHMIN: # %bb.0: @@ -8816,13 +8771,13 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_12 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_13 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_16 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else11 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_9 @@ -8841,14 +8796,35 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15 -; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_11 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %else17 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB75_16 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB75_13 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFHMIN-NEXT: ret -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load4 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 @@ -8859,7 +8835,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_6 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load7 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -8872,7 +8848,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_7 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load10 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 @@ -8885,30 +8861,6 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_8 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_9 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load16 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) -; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB75_11 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load19 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) -; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFHMIN-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) @@ -8969,13 +8921,13 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_12 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_13 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_16 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_7: # %else11 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_9 @@ -8995,14 +8947,37 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15 -; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_11 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_11: # %else17 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB76_16 -; RV64ZVE32F-ZVFH-NEXT: .LBB76_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_13 +; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_13: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFH-NEXT: ret -; RV64ZVE32F-ZVFH-NEXT: .LBB76_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %cond.load4 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 @@ -9014,7 +8989,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_6 -; RV64ZVE32F-ZVFH-NEXT: .LBB76_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load7 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -9028,7 +9003,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_7 -; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load10 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 @@ -9042,32 +9017,6 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_8 ; RV64ZVE32F-ZVFH-NEXT: j .LBB76_9 -; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load16 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255 -; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_11 -; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load19 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255 -; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFH-NEXT: ret ; ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16: ; RV64ZVE32F-ZVFHMIN: # %bb.0: @@ -9104,13 +9053,13 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_12 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_13 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_16 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else11 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_9 @@ -9130,14 +9079,37 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15 -; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_11 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %else17 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB76_16 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_13 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFHMIN-NEXT: ret -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load4 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 @@ -9149,7 +9121,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_6 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load7 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -9163,7 +9135,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_7 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load10 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 @@ -9177,32 +9149,6 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_8 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_9 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load16 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) -; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_11 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load19 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) -; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFHMIN-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i16> %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) @@ -9260,13 +9206,13 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_12 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_13 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else8 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14 +; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_16 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_7: # %else11 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_9 @@ -9284,14 +9230,33 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15 -; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_11 +; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_11: # %else17 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB77_16 -; RV64ZVE32F-ZVFH-NEXT: .LBB77_11: # %else20 +; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB77_13 +; RV64ZVE32F-ZVFH-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_13: # %else20 +; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFH-NEXT: ret -; RV64ZVE32F-ZVFH-NEXT: .LBB77_12: # %cond.load4 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_14: # %cond.load4 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 @@ -9301,7 +9266,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_6 -; RV64ZVE32F-ZVFH-NEXT: .LBB77_13: # %cond.load7 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load7 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -9313,7 +9278,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_7 -; RV64ZVE32F-ZVFH-NEXT: .LBB77_14: # %cond.load10 +; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load10 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 @@ -9324,28 +9289,6 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_8 ; RV64ZVE32F-ZVFH-NEXT: j .LBB77_9 -; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load16 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB77_11 -; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load19 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFH-NEXT: ret ; ; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16: ; RV64ZVE32F-ZVFHMIN: # %bb.0: @@ -9379,13 +9322,13 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_12 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_13 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else8 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14 +; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_16 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else11 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_9 @@ -9403,14 +9346,33 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15 -; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_11 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %else17 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB77_16 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB77_13 +; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 +; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) +; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %else20 +; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-ZVFHMIN-NEXT: ret -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_12: # %cond.load4 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load4 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 @@ -9420,7 +9382,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_6 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %cond.load7 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load7 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -9432,7 +9394,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_7 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load10 +; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load10 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 @@ -9443,28 +9405,6 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_8 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_9 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load16 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 -; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB77_11 -; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load19 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1 -; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1 -; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) -; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7 -; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9 -; RV64ZVE32F-ZVFHMIN-NEXT: ret %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru) ret <8 x half> %v @@ -9666,11 +9606,13 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr define <4 x float> @mgather_falsemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) { ; RV32-LABEL: mgather_falsemask_v4f32: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4f32: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv1r.v v8, v10 ; RV64V-NEXT: ret ; @@ -9847,13 +9789,13 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 ; RV64ZVE32F-NEXT: .LBB84_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB84_16 ; RV64ZVE32F-NEXT: .LBB84_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_9 @@ -9872,14 +9814,35 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB84_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB84_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB84_16 -; RV64ZVE32F-NEXT: .LBB84_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB84_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB84_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB84_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB84_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -9890,7 +9853,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_6 -; RV64ZVE32F-NEXT: .LBB84_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -9903,7 +9866,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_7 -; RV64ZVE32F-NEXT: .LBB84_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -9916,30 +9879,6 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_8 ; RV64ZVE32F-NEXT: j .LBB84_9 -; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB84_11 -; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) ret <8 x float> %v @@ -9998,13 +9937,13 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_15 ; RV64ZVE32F-NEXT: .LBB85_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB85_16 ; RV64ZVE32F-NEXT: .LBB85_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_9 @@ -10023,14 +9962,35 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB85_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB85_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB85_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB85_16 -; RV64ZVE32F-NEXT: .LBB85_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB85_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB85_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB85_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB85_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -10041,7 +10001,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_6 -; RV64ZVE32F-NEXT: .LBB85_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10054,7 +10014,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_7 -; RV64ZVE32F-NEXT: .LBB85_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -10067,30 +10027,6 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_8 ; RV64ZVE32F-NEXT: j .LBB85_9 -; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB85_11 -; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) @@ -10153,13 +10089,13 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB86_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB86_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_15 ; RV64ZVE32F-NEXT: .LBB86_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB86_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB86_16 ; RV64ZVE32F-NEXT: .LBB86_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_9 @@ -10179,14 +10115,37 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB86_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB86_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: andi a2, a2, 255 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB86_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB86_16 -; RV64ZVE32F-NEXT: .LBB86_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB86_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: andi a1, a1, 255 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB86_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -10198,7 +10157,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_6 -; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10212,7 +10171,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_7 -; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: andi a2, a2, 255 @@ -10226,32 +10185,6 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_8 ; RV64ZVE32F-NEXT: j .LBB86_9 -; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: andi a2, a2, 255 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB86_11 -; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: andi a1, a1, 255 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i8> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) @@ -10312,13 +10245,13 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_15 ; RV64ZVE32F-NEXT: .LBB87_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB87_16 ; RV64ZVE32F-NEXT: .LBB87_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_9 @@ -10337,14 +10270,35 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB87_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB87_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB87_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB87_16 -; RV64ZVE32F-NEXT: .LBB87_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB87_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB87_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB87_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -10355,7 +10309,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_6 -; RV64ZVE32F-NEXT: .LBB87_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10368,7 +10322,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_7 -; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -10381,30 +10335,6 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_8 ; RV64ZVE32F-NEXT: j .LBB87_9 -; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB87_11 -; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) ret <8 x float> %v @@ -10464,13 +10394,13 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_15 ; RV64ZVE32F-NEXT: .LBB88_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB88_16 ; RV64ZVE32F-NEXT: .LBB88_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_9 @@ -10489,14 +10419,35 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB88_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB88_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB88_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB88_16 -; RV64ZVE32F-NEXT: .LBB88_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB88_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB88_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB88_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -10507,7 +10458,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_6 -; RV64ZVE32F-NEXT: .LBB88_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10520,7 +10471,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_7 -; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -10533,30 +10484,6 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_8 ; RV64ZVE32F-NEXT: j .LBB88_9 -; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB88_11 -; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = sext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) @@ -10620,13 +10547,13 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_12 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_13 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_15 ; RV64ZVE32F-NEXT: .LBB89_6: # %else8 ; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB89_16 ; RV64ZVE32F-NEXT: .LBB89_7: # %else11 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_9 @@ -10646,14 +10573,37 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a3, .LBB89_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: and a3, a3, a1 +; RV64ZVE32F-NEXT: slli a3, a3, 2 +; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB89_11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB89_16 -; RV64ZVE32F-NEXT: .LBB89_11: # %else20 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: and a1, a2, a1 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB89_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB89_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 @@ -10665,7 +10615,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_6 -; RV64ZVE32F-NEXT: .LBB89_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -10679,7 +10629,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_7 -; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 ; RV64ZVE32F-NEXT: and a3, a3, a1 @@ -10693,32 +10643,6 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB89_8 ; RV64ZVE32F-NEXT: j .LBB89_9 -; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_11 -; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: and a1, a2, a1 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) @@ -10775,13 +10699,13 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB90_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB90_13 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_15 ; RV64ZVE32F-NEXT: .LBB90_6: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB90_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB90_16 ; RV64ZVE32F-NEXT: .LBB90_7: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_9 @@ -10799,14 +10723,33 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB90_15 -; RV64ZVE32F-NEXT: # %bb.10: # %else17 +; RV64ZVE32F-NEXT: beqz a2, .LBB90_11 +; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 2 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 +; RV64ZVE32F-NEXT: .LBB90_11: # %else17 ; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB90_16 -; RV64ZVE32F-NEXT: .LBB90_11: # %else20 +; RV64ZVE32F-NEXT: beqz a1, .LBB90_13 +; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: add a0, a0, a1 +; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 +; RV64ZVE32F-NEXT: .LBB90_13: # %else20 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB90_12: # %cond.load4 +; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -10816,7 +10759,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_6 -; RV64ZVE32F-NEXT: .LBB90_13: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10827,7 +10770,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_7 -; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 @@ -10838,28 +10781,6 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB90_8 ; RV64ZVE32F-NEXT: j .LBB90_9 -; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: slli a2, a2, 2 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB90_11 -; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load19 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: slli a1, a1, 2 -; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 -; RV64ZVE32F-NEXT: vmv2r.v v8, v10 -; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) ret <8 x float> %v @@ -11135,11 +11056,13 @@ define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passt define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) { ; RV32V-LABEL: mgather_falsemask_v4f64: ; RV32V: # %bb.0: +; RV32V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32V-NEXT: vmv2r.v v8, v10 ; RV32V-NEXT: ret ; ; RV64V-LABEL: mgather_falsemask_v4f64: ; RV64V: # %bb.0: +; RV64V-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64V-NEXT: vmv2r.v v8, v10 ; RV64V-NEXT: ret ; @@ -13700,6 +13623,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15 ; RV64ZVE32F-NEXT: .LBB107_24: # %else44 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB107_25: # %cond.load4 @@ -14086,6 +14010,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31 ; RV64ZVE32F-NEXT: .LBB108_48: # %else92 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB108_49: # %cond.load4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index e0cf39c75da240..e5f3e22361f635 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -318,6 +318,7 @@ define <128 x i16> @masked_load_v128i16(ptr %a, <128 x i1> %mask) { define <256 x i8> @masked_load_v256i8(ptr %a, <256 x i1> %mask) { ; CHECK-LABEL: masked_load_v256i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index 46c2033d28b387..e614307f35dcc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -135,6 +135,7 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) @@ -263,6 +264,7 @@ declare <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float>, <8 x i1>, i32) define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -307,6 +309,7 @@ declare <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float>, <16 x i1>, i32) define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -393,6 +396,7 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) @@ -437,6 +441,7 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) @@ -481,6 +486,7 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -525,6 +531,7 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -569,9 +576,9 @@ declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll index ad358d73202402..ddb16fe11719a6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -23,6 +23,7 @@ declare i1 @llvm.vp.reduce.or.v1i1(i1, <1 x i1>, <1 x i1>, i32) define zeroext i1 @vpreduce_or_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -39,6 +40,7 @@ declare i1 @llvm.vp.reduce.xor.v1i1(i1, <1 x i1>, <1 x i1>, i32) define zeroext i1 @vpreduce_xor_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -71,6 +73,7 @@ declare i1 @llvm.vp.reduce.or.v2i1(i1, <2 x i1>, <2 x i1>, i32) define zeroext i1 @vpreduce_or_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -87,6 +90,7 @@ declare i1 @llvm.vp.reduce.xor.v2i1(i1, <2 x i1>, <2 x i1>, i32) define zeroext i1 @vpreduce_xor_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -119,6 +123,7 @@ declare i1 @llvm.vp.reduce.or.v4i1(i1, <4 x i1>, <4 x i1>, i32) define zeroext i1 @vpreduce_or_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -135,6 +140,7 @@ declare i1 @llvm.vp.reduce.xor.v4i1(i1, <4 x i1>, <4 x i1>, i32) define zeroext i1 @vpreduce_xor_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -167,6 +173,7 @@ declare i1 @llvm.vp.reduce.or.v8i1(i1, <8 x i1>, <8 x i1>, i32) define zeroext i1 @vpreduce_or_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -183,6 +190,7 @@ declare i1 @llvm.vp.reduce.xor.v8i1(i1, <8 x i1>, <8 x i1>, i32) define zeroext i1 @vpreduce_xor_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -231,6 +239,7 @@ declare i1 @llvm.vp.reduce.and.v256i1(i1, <256 x i1>, <256 x i1>, i32) define zeroext i1 @vpreduce_and_v256i1(i1 zeroext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_and_v256i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v9 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: li a3, 128 @@ -265,6 +274,7 @@ declare i1 @llvm.vp.reduce.or.v16i1(i1, <16 x i1>, <16 x i1>, i32) define zeroext i1 @vpreduce_or_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_v16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -281,6 +291,7 @@ declare i1 @llvm.vp.reduce.xor.v16i1(i1, <16 x i1>, <16 x i1>, i32) define zeroext i1 @vpreduce_xor_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -297,6 +308,7 @@ declare i1 @llvm.vp.reduce.add.v1i1(i1, <1 x i1>, <1 x i1>, i32) define zeroext i1 @vpreduce_add_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -313,6 +325,7 @@ declare i1 @llvm.vp.reduce.add.v2i1(i1, <2 x i1>, <2 x i1>, i32) define zeroext i1 @vpreduce_add_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -329,6 +342,7 @@ declare i1 @llvm.vp.reduce.add.v4i1(i1, <4 x i1>, <4 x i1>, i32) define zeroext i1 @vpreduce_add_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -345,6 +359,7 @@ declare i1 @llvm.vp.reduce.add.v8i1(i1, <8 x i1>, <8 x i1>, i32) define zeroext i1 @vpreduce_add_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -361,6 +376,7 @@ declare i1 @llvm.vp.reduce.add.v16i1(i1, <16 x i1>, <16 x i1>, i32) define zeroext i1 @vpreduce_add_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_v16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -489,6 +505,7 @@ declare i1 @llvm.vp.reduce.smin.v1i1(i1, <1 x i1>, <1 x i1>, i32) define zeroext i1 @vpreduce_smin_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -505,6 +522,7 @@ declare i1 @llvm.vp.reduce.smin.v2i1(i1, <2 x i1>, <2 x i1>, i32) define zeroext i1 @vpreduce_smin_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -521,6 +539,7 @@ declare i1 @llvm.vp.reduce.smin.v4i1(i1, <4 x i1>, <4 x i1>, i32) define zeroext i1 @vpreduce_smin_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -537,6 +556,7 @@ declare i1 @llvm.vp.reduce.smin.v8i1(i1, <8 x i1>, <8 x i1>, i32) define zeroext i1 @vpreduce_smin_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -553,6 +573,7 @@ declare i1 @llvm.vp.reduce.smin.v16i1(i1, <16 x i1>, <16 x i1>, i32) define zeroext i1 @vpreduce_smin_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -569,6 +590,7 @@ declare i1 @llvm.vp.reduce.smin.v32i1(i1, <32 x i1>, <32 x i1>, i32) define zeroext i1 @vpreduce_smin_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -585,6 +607,7 @@ declare i1 @llvm.vp.reduce.smin.v64i1(i1, <64 x i1>, <64 x i1>, i32) define zeroext i1 @vpreduce_smin_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_v64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -601,6 +624,7 @@ declare i1 @llvm.vp.reduce.umax.v1i1(i1, <1 x i1>, <1 x i1>, i32) define zeroext i1 @vpreduce_umax_v1i1(i1 zeroext %s, <1 x i1> %v, <1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -617,6 +641,7 @@ declare i1 @llvm.vp.reduce.umax.v2i1(i1, <2 x i1>, <2 x i1>, i32) define zeroext i1 @vpreduce_umax_v2i1(i1 zeroext %s, <2 x i1> %v, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -633,6 +658,7 @@ declare i1 @llvm.vp.reduce.umax.v4i1(i1, <4 x i1>, <4 x i1>, i32) define zeroext i1 @vpreduce_umax_v4i1(i1 zeroext %s, <4 x i1> %v, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -649,6 +675,7 @@ declare i1 @llvm.vp.reduce.umax.v8i1(i1, <8 x i1>, <8 x i1>, i32) define zeroext i1 @vpreduce_umax_v8i1(i1 zeroext %s, <8 x i1> %v, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -665,6 +692,7 @@ declare i1 @llvm.vp.reduce.umax.v16i1(i1, <16 x i1>, <16 x i1>, i32) define zeroext i1 @vpreduce_umax_v16i1(i1 zeroext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -681,6 +709,7 @@ declare i1 @llvm.vp.reduce.umax.v32i1(i1, <32 x i1>, <32 x i1>, i32) define zeroext i1 @vpreduce_umax_v32i1(i1 zeroext %s, <32 x i1> %v, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -697,6 +726,7 @@ declare i1 @llvm.vp.reduce.umax.v64i1(i1, <64 x i1>, <64 x i1>, i32) define zeroext i1 @vpreduce_umax_v64i1(i1 zeroext %s, <64 x i1> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_v64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index b8617fda3aa7ec..b45d18d01f67a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -123,6 +123,7 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1) @@ -239,6 +240,7 @@ declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32) define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -279,6 +281,7 @@ declare <16 x float> @llvm.vp.rint.v16f32(<16 x float>, <16 x i1>, i32) define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -357,6 +360,7 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) @@ -397,6 +401,7 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) @@ -437,6 +442,7 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -477,6 +483,7 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -517,9 +524,9 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index 820a05e3d6042b..0c23a71b9af3f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -194,8 +194,8 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vp_round_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -261,6 +261,7 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) @@ -280,8 +281,8 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_round_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -431,6 +432,7 @@ declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, i32) define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -475,6 +477,7 @@ declare <16 x float> @llvm.vp.round.v16f32(<16 x float>, <16 x i1>, i32) define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -561,6 +564,7 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) @@ -605,6 +609,7 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) @@ -649,6 +654,7 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -693,6 +699,7 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -743,6 +750,7 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -750,7 +758,6 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 8391c7939180a0..eed410343999dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -194,8 +194,8 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vp_roundeven_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -261,6 +261,7 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) @@ -280,8 +281,8 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ; ZVFHMIN-LABEL: vp_roundeven_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -431,6 +432,7 @@ declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x i1>, i32) define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -475,6 +477,7 @@ declare <16 x float> @llvm.vp.roundeven.v16f32(<16 x float>, <16 x i1>, i32) define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -561,6 +564,7 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) @@ -605,6 +609,7 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) @@ -649,6 +654,7 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -693,6 +699,7 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -743,6 +750,7 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -750,7 +758,6 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 8c38d244602655..fb3015e23a9495 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -194,8 +194,8 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ; ZVFHMIN-LABEL: vp_roundtozero_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v9, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -261,6 +261,7 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32) define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1) @@ -280,8 +281,8 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ; ZVFHMIN-LABEL: vp_roundtozero_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -431,6 +432,7 @@ declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32) define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -475,6 +477,7 @@ declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32) define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -561,6 +564,7 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1) @@ -605,6 +609,7 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1) @@ -649,6 +654,7 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32) define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -693,6 +699,7 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -743,6 +750,7 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -750,7 +758,6 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index d52c42891fcc3b..11b0edcd77bb6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -598,6 +598,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -648,6 +649,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v256i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma @@ -677,6 +679,7 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v256i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll index 38026bb591f797..0ae55f035cc9bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll @@ -8,8 +8,8 @@ define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) { ; VLA-LABEL: concat_2xv4i32: ; VLA: # %bb.0: -; VLA-NEXT: vmv1r.v v10, v9 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vmv1r.v v10, v9 ; VLA-NEXT: vslideup.vi v8, v10, 4 ; VLA-NEXT: ret ; @@ -32,9 +32,9 @@ define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; ; VLS-LABEL: concat_4xv2i32: ; VLS: # %bb.0: +; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vmv1r.v v13, v10 ; VLS-NEXT: vmv1r.v v12, v8 -; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vslideup.vi v13, v11, 2 ; VLS-NEXT: vslideup.vi v12, v9, 2 ; VLS-NEXT: vmv2r.v v8, v12 @@ -62,9 +62,9 @@ define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x ; ; VLS-LABEL: concat_8xv1i32: ; VLS: # %bb.0: +; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; VLS-NEXT: vmv1r.v v17, v12 ; VLS-NEXT: vmv1r.v v16, v8 -; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; VLS-NEXT: vslideup.vi v14, v15, 1 ; VLS-NEXT: vslideup.vi v17, v13, 1 ; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -89,8 +89,8 @@ define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x define <16 x i32> @concat_2xv8i32(<8 x i32> %a, <8 x i32> %b) { ; VLA-LABEL: concat_2xv8i32: ; VLA: # %bb.0: -; VLA-NEXT: vmv2r.v v12, v10 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLA-NEXT: vmv2r.v v12, v10 ; VLA-NEXT: vslideup.vi v8, v12, 8 ; VLA-NEXT: ret ; @@ -104,10 +104,10 @@ define <16 x i32> @concat_2xv8i32(<8 x i32> %a, <8 x i32> %b) { define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; VLA-LABEL: concat_4xv4i32: ; VLA: # %bb.0: +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; VLA-NEXT: vmv1r.v v14, v11 ; VLA-NEXT: vmv1r.v v12, v10 ; VLA-NEXT: vmv1r.v v10, v9 -; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; VLA-NEXT: vslideup.vi v12, v14, 4 ; VLA-NEXT: vslideup.vi v8, v10, 4 ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -140,11 +140,11 @@ define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x ; ; VLS-LABEL: concat_8xv2i32: ; VLS: # %bb.0: +; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vmv1r.v v19, v14 ; VLS-NEXT: vmv1r.v v18, v12 ; VLS-NEXT: vmv1r.v v17, v10 ; VLS-NEXT: vmv1r.v v16, v8 -; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; VLS-NEXT: vslideup.vi v19, v15, 2 ; VLS-NEXT: vslideup.vi v18, v13, 2 ; VLS-NEXT: vslideup.vi v17, v11, 2 @@ -164,6 +164,7 @@ define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x define <32 x i32> @concat_2xv16i32(<16 x i32> %a, <16 x i32> %b) { ; VLA-LABEL: concat_2xv16i32: ; VLA: # %bb.0: +; VLA-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; VLA-NEXT: vmv4r.v v16, v12 ; VLA-NEXT: li a0, 32 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -180,11 +181,11 @@ define <32 x i32> @concat_2xv16i32(<16 x i32> %a, <16 x i32> %b) { define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { ; VLA-LABEL: concat_4xv8i32: ; VLA: # %bb.0: +; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; VLA-NEXT: vmv2r.v v20, v14 ; VLA-NEXT: vmv2r.v v16, v12 ; VLA-NEXT: vmv2r.v v12, v10 ; VLA-NEXT: li a0, 32 -; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; VLA-NEXT: vslideup.vi v16, v20, 8 ; VLA-NEXT: vslideup.vi v8, v12, 8 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -203,6 +204,7 @@ define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h) { ; VLA-LABEL: concat_8xv4i32: ; VLA: # %bb.0: +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; VLA-NEXT: vmv1r.v v18, v15 ; VLA-NEXT: vmv1r.v v20, v14 ; VLA-NEXT: vmv1r.v v14, v13 @@ -211,7 +213,6 @@ define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x ; VLA-NEXT: vmv1r.v v12, v10 ; VLA-NEXT: vmv1r.v v10, v9 ; VLA-NEXT: li a0, 32 -; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; VLA-NEXT: vslideup.vi v20, v18, 4 ; VLA-NEXT: vslideup.vi v16, v14, 4 ; VLA-NEXT: vslideup.vi v12, v22, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index d461fa8378cffc..d560977d25e085 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -108,6 +108,7 @@ define <4 x i64> @m2_splat_into_identity(<4 x i64> %v1) vscale_range(2,2) { define <4 x i64> @m2_broadcast_i128(<4 x i64> %v1) vscale_range(2,2) { ; CHECK-LABEL: m2_broadcast_i128: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: ret %res = shufflevector <4 x i64> %v1, <4 x i64> poison, <4 x i32> @@ -117,6 +118,7 @@ define <4 x i64> @m2_broadcast_i128(<4 x i64> %v1) vscale_range(2,2) { define <8 x i64> @m4_broadcast_i128(<8 x i64> %v1) vscale_range(2,2) { ; CHECK-LABEL: m4_broadcast_i128: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index 407535831aedad..f7647ff38c8a08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -966,9 +966,9 @@ define <16 x i8> @reverse_v16i8_2(<8 x i8> %a, <8 x i8> %b) { define <32 x i8> @reverse_v32i8_2(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: reverse_v32i8_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vrsub.vx v12, v12, a1 @@ -1035,9 +1035,9 @@ define <8 x i16> @reverse_v8i16_2(<4 x i16> %a, <4 x i16> %b) { define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: reverse_v16i16_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: addi a1, a1, -1 @@ -1060,9 +1060,9 @@ define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) { define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: reverse_v32i16_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: addi a1, a1, -1 @@ -1116,9 +1116,9 @@ define <4 x i32> @reverse_v4i32_2(<2 x i32> %a, < 2 x i32> %b) { define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: reverse_v8i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 @@ -1142,9 +1142,9 @@ define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) { define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: reverse_v16i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 @@ -1170,9 +1170,9 @@ define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) { define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: reverse_v32i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 @@ -1219,9 +1219,9 @@ define <4 x i64> @reverse_v4i64_2(<2 x i64> %a, < 2 x i64> %b) { define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: reverse_v8i64_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: srli a1, a0, 3 ; CHECK-NEXT: addi a1, a1, -1 @@ -1289,9 +1289,9 @@ define <8 x half> @reverse_v8f16_2(<4 x half> %a, <4 x half> %b) { define <16 x half> @reverse_v16f16_2(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: reverse_v16f16_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: addi a1, a1, -1 @@ -1361,9 +1361,9 @@ define <4 x float> @reverse_v4f32_2(<2 x float> %a, <2 x float> %b) { define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: reverse_v8f32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 @@ -1387,9 +1387,9 @@ define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) { define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: reverse_v16f32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: addi a1, a1, -1 @@ -1430,9 +1430,9 @@ define <4 x double> @reverse_v4f64_2(<2 x double> %a, < 2 x double> %b) { define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: reverse_v8f64_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: srli a1, a0, 3 ; CHECK-NEXT: addi a1, a1, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll index c37c3a9ee0ea0c..54185a64202485 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll @@ -415,8 +415,8 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) { define <2 x i8> @vslide1up_4xi8_neg_length_changing(<4 x i8> %v, i8 %b) { ; CHECK-LABEL: vslide1up_4xi8_neg_length_changing: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 1a08c613ca36a3..e6edbe6afb9a7b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -62,6 +62,7 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado ; CHECK-NEXT: li a4, 5 ; CHECK-NEXT: .LBB1_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 1c2c90478a1f77..81671c21e15b46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -542,6 +542,7 @@ declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr, i32, < define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x i1> %m, i32 zeroext %evl) nounwind { ; CHECK-LABEL: strided_vpload_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: mv a3, a2 @@ -598,6 +599,7 @@ declare <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr, i32, define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_load_v33f64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v8, v0 ; CHECK-RV32-NEXT: li a5, 32 ; CHECK-RV32-NEXT: mv a3, a4 @@ -648,6 +650,7 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; ; CHECK-RV64-LABEL: strided_load_v33f64: ; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v8, v0 ; CHECK-RV64-NEXT: li a5, 32 ; CHECK-RV64-NEXT: mv a4, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index 12893ec55cda76..a91dee1cb245f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -53,9 +53,9 @@ declare <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16>, <128 x i1>, i32) define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v128i7_v128i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 8 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a1, .LBB4_2 @@ -231,6 +231,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 72 * vlenb +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a3, 24 @@ -243,7 +244,6 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v6, v0, 8 ; CHECK-NEXT: addi a2, a1, 512 ; CHECK-NEXT: addi a3, a1, 640 @@ -541,9 +541,9 @@ declare <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v32i32_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB17_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index db03dc3d5ab1e2..488f364780395b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -80,14 +80,8 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-SLOW-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32-SLOW-NEXT: vmv.x.s a0, v0 ; RV32-SLOW-NEXT: andi a1, a0, 1 -; RV32-SLOW-NEXT: bnez a1, .LBB4_3 -; RV32-SLOW-NEXT: # %bb.1: # %else -; RV32-SLOW-NEXT: andi a0, a0, 2 -; RV32-SLOW-NEXT: bnez a0, .LBB4_4 -; RV32-SLOW-NEXT: .LBB4_2: # %else2 -; RV32-SLOW-NEXT: vmv1r.v v8, v9 -; RV32-SLOW-NEXT: ret -; RV32-SLOW-NEXT: .LBB4_3: # %cond.load +; RV32-SLOW-NEXT: beqz a1, .LBB4_2 +; RV32-SLOW-NEXT: # %bb.1: # %cond.load ; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-SLOW-NEXT: vmv.x.s a1, v8 ; RV32-SLOW-NEXT: lbu a2, 1(a1) @@ -96,9 +90,10 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-SLOW-NEXT: or a1, a2, a1 ; RV32-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-SLOW-NEXT: vmv.s.x v9, a1 +; RV32-SLOW-NEXT: .LBB4_2: # %else ; RV32-SLOW-NEXT: andi a0, a0, 2 -; RV32-SLOW-NEXT: beqz a0, .LBB4_2 -; RV32-SLOW-NEXT: .LBB4_4: # %cond.load1 +; RV32-SLOW-NEXT: beqz a0, .LBB4_4 +; RV32-SLOW-NEXT: # %bb.3: # %cond.load1 ; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1 ; RV32-SLOW-NEXT: vmv.x.s a0, v8 @@ -109,6 +104,8 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-SLOW-NEXT: vmv.s.x v8, a0 ; RV32-SLOW-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV32-SLOW-NEXT: vslideup.vi v9, v8, 1 +; RV32-SLOW-NEXT: .LBB4_4: # %else2 +; RV32-SLOW-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-SLOW-NEXT: vmv1r.v v8, v9 ; RV32-SLOW-NEXT: ret ; @@ -117,14 +114,8 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-SLOW-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a0, v0 ; RV64-SLOW-NEXT: andi a1, a0, 1 -; RV64-SLOW-NEXT: bnez a1, .LBB4_3 -; RV64-SLOW-NEXT: # %bb.1: # %else -; RV64-SLOW-NEXT: andi a0, a0, 2 -; RV64-SLOW-NEXT: bnez a0, .LBB4_4 -; RV64-SLOW-NEXT: .LBB4_2: # %else2 -; RV64-SLOW-NEXT: vmv1r.v v8, v9 -; RV64-SLOW-NEXT: ret -; RV64-SLOW-NEXT: .LBB4_3: # %cond.load +; RV64-SLOW-NEXT: beqz a1, .LBB4_2 +; RV64-SLOW-NEXT: # %bb.1: # %cond.load ; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a1, v8 ; RV64-SLOW-NEXT: lbu a2, 1(a1) @@ -133,9 +124,10 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-SLOW-NEXT: or a1, a2, a1 ; RV64-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-SLOW-NEXT: vmv.s.x v9, a1 +; RV64-SLOW-NEXT: .LBB4_2: # %else ; RV64-SLOW-NEXT: andi a0, a0, 2 -; RV64-SLOW-NEXT: beqz a0, .LBB4_2 -; RV64-SLOW-NEXT: .LBB4_4: # %cond.load1 +; RV64-SLOW-NEXT: beqz a0, .LBB4_4 +; RV64-SLOW-NEXT: # %bb.3: # %cond.load1 ; RV64-SLOW-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1 ; RV64-SLOW-NEXT: vmv.x.s a0, v8 @@ -146,6 +138,8 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-SLOW-NEXT: vmv.s.x v8, a0 ; RV64-SLOW-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV64-SLOW-NEXT: vslideup.vi v9, v8, 1 +; RV64-SLOW-NEXT: .LBB4_4: # %else2 +; RV64-SLOW-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-SLOW-NEXT: vmv1r.v v8, v9 ; RV64-SLOW-NEXT: ret ; @@ -174,23 +168,18 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV32-SLOW-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV32-SLOW-NEXT: vmv.x.s a0, v0 ; RV32-SLOW-NEXT: andi a1, a0, 1 -; RV32-SLOW-NEXT: bnez a1, .LBB5_3 -; RV32-SLOW-NEXT: # %bb.1: # %else -; RV32-SLOW-NEXT: andi a0, a0, 2 -; RV32-SLOW-NEXT: bnez a0, .LBB5_4 -; RV32-SLOW-NEXT: .LBB5_2: # %else2 -; RV32-SLOW-NEXT: vmv1r.v v8, v9 -; RV32-SLOW-NEXT: ret -; RV32-SLOW-NEXT: .LBB5_3: # %cond.load +; RV32-SLOW-NEXT: beqz a1, .LBB5_2 +; RV32-SLOW-NEXT: # %bb.1: # %cond.load ; RV32-SLOW-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32-SLOW-NEXT: vmv.x.s a1, v8 ; RV32-SLOW-NEXT: lw a2, 0(a1) ; RV32-SLOW-NEXT: lw a1, 4(a1) ; RV32-SLOW-NEXT: vslide1down.vx v9, v9, a2 ; RV32-SLOW-NEXT: vslide1down.vx v9, v9, a1 +; RV32-SLOW-NEXT: .LBB5_2: # %else ; RV32-SLOW-NEXT: andi a0, a0, 2 -; RV32-SLOW-NEXT: beqz a0, .LBB5_2 -; RV32-SLOW-NEXT: .LBB5_4: # %cond.load1 +; RV32-SLOW-NEXT: beqz a0, .LBB5_4 +; RV32-SLOW-NEXT: # %bb.3: # %cond.load1 ; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1 ; RV32-SLOW-NEXT: vmv.x.s a0, v8 @@ -201,6 +190,8 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV32-SLOW-NEXT: vslide1down.vx v8, v8, a0 ; RV32-SLOW-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-SLOW-NEXT: vslideup.vi v9, v8, 1 +; RV32-SLOW-NEXT: .LBB5_4: # %else2 +; RV32-SLOW-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-SLOW-NEXT: vmv1r.v v8, v9 ; RV32-SLOW-NEXT: ret ; @@ -209,14 +200,8 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV64-SLOW-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a0, v0 ; RV64-SLOW-NEXT: andi a1, a0, 1 -; RV64-SLOW-NEXT: bnez a1, .LBB5_3 -; RV64-SLOW-NEXT: # %bb.1: # %else -; RV64-SLOW-NEXT: andi a0, a0, 2 -; RV64-SLOW-NEXT: bnez a0, .LBB5_4 -; RV64-SLOW-NEXT: .LBB5_2: # %else2 -; RV64-SLOW-NEXT: vmv1r.v v8, v9 -; RV64-SLOW-NEXT: ret -; RV64-SLOW-NEXT: .LBB5_3: # %cond.load +; RV64-SLOW-NEXT: beqz a1, .LBB5_2 +; RV64-SLOW-NEXT: # %bb.1: # %cond.load ; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-SLOW-NEXT: vmv.x.s a1, v8 ; RV64-SLOW-NEXT: lwu a2, 4(a1) @@ -224,9 +209,10 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV64-SLOW-NEXT: slli a2, a2, 32 ; RV64-SLOW-NEXT: or a1, a2, a1 ; RV64-SLOW-NEXT: vmv.s.x v9, a1 +; RV64-SLOW-NEXT: .LBB5_2: # %else ; RV64-SLOW-NEXT: andi a0, a0, 2 -; RV64-SLOW-NEXT: beqz a0, .LBB5_2 -; RV64-SLOW-NEXT: .LBB5_4: # %cond.load1 +; RV64-SLOW-NEXT: beqz a0, .LBB5_4 +; RV64-SLOW-NEXT: # %bb.3: # %cond.load1 ; RV64-SLOW-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1 ; RV64-SLOW-NEXT: vmv.x.s a0, v8 @@ -236,6 +222,8 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV64-SLOW-NEXT: or a0, a1, a0 ; RV64-SLOW-NEXT: vmv.s.x v8, a0 ; RV64-SLOW-NEXT: vslideup.vi v9, v8, 1 +; RV64-SLOW-NEXT: .LBB5_4: # %else2 +; RV64-SLOW-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-SLOW-NEXT: vmv1r.v v8, v9 ; RV64-SLOW-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 5be1a771eb2799..9e9989590e4eb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -363,6 +363,7 @@ declare <256 x i8> @llvm.vp.add.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index ac48542ca9ebb3..5ec18aec2b9a60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -267,6 +267,7 @@ declare <256 x i8> @llvm.vp.smax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 794eef6ed40b21..d47e17077bf902 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -266,6 +266,7 @@ declare <256 x i8> @llvm.vp.umax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index 34011f6bd8acd3..23d292a5fd77d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -267,6 +267,7 @@ declare <256 x i8> @llvm.vp.smin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index 79e72b7d9cac9d..b400015856f84a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -266,6 +266,7 @@ declare <256 x i8> @llvm.vp.umin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 24e75cde2ce915..df9ff0fc39a7e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -2617,8 +2617,8 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: li a2, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 71f497e4c7be48..a971b469df0a2a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -394,6 +394,7 @@ declare <33 x double> @llvm.vp.load.v33f64.p0(ptr, <33 x i1>, i32) define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v33f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: mv a3, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index a11c2b6bca12ec..a53d33e6120d55 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1181,9 +1181,9 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: li a1, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 888fc79f0122da..ede197c11eb916 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -372,6 +372,7 @@ declare <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vi_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index e1d57ae1e67414..13dc0702c16aa5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -368,6 +368,7 @@ declare <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsaddu_vi_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 1d8af4c46cc078..07b08e2518ea44 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -163,6 +163,7 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v6, v8 ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: li a2, 128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 8fad3db55f9bcd..8a199770c163dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -384,6 +384,7 @@ declare <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vi_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: addi a3, a1, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index ca35aa6c4a94c1..37c5e397569277 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -379,6 +379,7 @@ declare <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vi_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: addi a3, a1, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index e6dfe5e78cdb4b..ff12aaac3a983d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -117,8 +117,8 @@ declare @llvm.vp.floor.nxv4bf16(, @vp_floor_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -169,8 +169,8 @@ declare @llvm.vp.floor.nxv8bf16(, @vp_floor_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -221,8 +221,8 @@ declare @llvm.vp.floor.nxv16bf16(, define @vp_floor_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -279,9 +279,9 @@ define @vp_floor_nxv32bf16( %va, @vp_floor_nxv4f16( %va, @llvm.vp.floor.nxv8f16(, @vp_floor_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) @@ -668,8 +669,8 @@ define @vp_floor_nxv8f16( %va, @llvm.vp.floor.nxv16f16(, @vp_floor_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) @@ -754,8 +756,8 @@ define @vp_floor_nxv16f16( %va, @llvm.vp.floor.nxv32f16(, @vp_floor_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) @@ -846,9 +849,9 @@ define @vp_floor_nxv32f16( %va, @llvm.vp.floor.nxv4f32(, @vp_floor_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -1112,6 +1116,7 @@ declare @llvm.vp.floor.nxv8f32(, @vp_floor_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -1156,6 +1161,7 @@ declare @llvm.vp.floor.nxv16f32(, @vp_floor_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1242,6 +1248,7 @@ declare @llvm.vp.floor.nxv2f64(, @vp_floor_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) @@ -1286,6 +1293,7 @@ declare @llvm.vp.floor.nxv4f64(, @vp_floor_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) @@ -1330,6 +1338,7 @@ declare @llvm.vp.floor.nxv7f64(, @vp_floor_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) @@ -1374,6 +1383,7 @@ declare @llvm.vp.floor.nxv8f64(, @vp_floor_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) @@ -1425,13 +1435,13 @@ define @vp_floor_nxv16f64( %va, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32bf16_vv( %a, @vfmax_nxv32f16_vv( %a, @vfmax_nxv32f16_vv( %a, @llvm.vp.maximum.nxv1bf16(, < define @vfmax_vv_nxv1bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -66,8 +66,8 @@ declare @llvm.vp.maximum.nxv2bf16(, < define @vfmax_vv_nxv2bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -113,8 +113,8 @@ declare @llvm.vp.maximum.nxv4bf16(, < define @vfmax_vv_nxv4bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t @@ -162,8 +162,8 @@ declare @llvm.vp.maximum.nxv8bf16(, < define @vfmax_vv_nxv8bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t @@ -217,8 +217,8 @@ define @vfmax_vv_nxv16bf16( %va, @llvm.vp.maximum.nxv1f16(, @vfmax_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv1f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -582,8 +583,8 @@ define @vfmax_vv_nxv1f16( %va, @llvm.vp.maximum.nxv2f16(, @vfmax_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv2f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -652,8 +654,8 @@ define @vfmax_vv_nxv2f16( %va, @llvm.vp.maximum.nxv4f16(, @vfmax_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv4f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -722,8 +725,8 @@ define @vfmax_vv_nxv4f16( %va, @llvm.vp.maximum.nxv8f16(, @vfmax_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -796,8 +800,8 @@ define @vfmax_vv_nxv8f16( %va, @llvm.vp.maximum.nxv16f16(, @vfmax_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -876,8 +881,8 @@ define @vfmax_vv_nxv16f16( %va, @vfmax_vv_nxv32f16( %va, @llvm.vp.maximum.nxv1f32(, @vfmax_vv_nxv1f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -1313,6 +1320,7 @@ declare @llvm.vp.maximum.nxv2f32(, @vfmax_vv_nxv2f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -1346,6 +1354,7 @@ declare @llvm.vp.maximum.nxv4f32(, @vfmax_vv_nxv4f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -1381,6 +1390,7 @@ declare @llvm.vp.maximum.nxv8f32(, @vfmax_vv_nxv8f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -1416,6 +1426,7 @@ declare @llvm.vp.maximum.nxv1f64(, @vfmax_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -1449,6 +1460,7 @@ declare @llvm.vp.maximum.nxv2f64(, @vfmax_vv_nxv2f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -1484,6 +1496,7 @@ declare @llvm.vp.maximum.nxv4f64(, @vfmax_vv_nxv4f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -1525,6 +1538,7 @@ define @vfmax_vv_nxv8f64( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32bf16_vv( %a, @vfmin_nxv32f16_vv( %a, @vfmin_nxv32f16_vv( %a, @llvm.vp.minimum.nxv1bf16(, < define @vfmin_vv_nxv1bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -66,8 +66,8 @@ declare @llvm.vp.minimum.nxv2bf16(, < define @vfmin_vv_nxv2bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t @@ -113,8 +113,8 @@ declare @llvm.vp.minimum.nxv4bf16(, < define @vfmin_vv_nxv4bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t @@ -162,8 +162,8 @@ declare @llvm.vp.minimum.nxv8bf16(, < define @vfmin_vv_nxv8bf16( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t @@ -217,8 +217,8 @@ define @vfmin_vv_nxv16bf16( %va, @llvm.vp.minimum.nxv1f16(, @vfmin_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv1f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -582,8 +583,8 @@ define @vfmin_vv_nxv1f16( %va, @llvm.vp.minimum.nxv2f16(, @vfmin_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv2f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -652,8 +654,8 @@ define @vfmin_vv_nxv2f16( %va, @llvm.vp.minimum.nxv4f16(, @vfmin_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv4f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -722,8 +725,8 @@ define @vfmin_vv_nxv4f16( %va, @llvm.vp.minimum.nxv8f16(, @vfmin_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -796,8 +800,8 @@ define @vfmin_vv_nxv8f16( %va, @llvm.vp.minimum.nxv16f16(, @vfmin_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -876,8 +881,8 @@ define @vfmin_vv_nxv16f16( %va, @vfmin_vv_nxv32f16( %va, @llvm.vp.minimum.nxv1f32(, @vfmin_vv_nxv1f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -1313,6 +1320,7 @@ declare @llvm.vp.minimum.nxv2f32(, @vfmin_vv_nxv2f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -1346,6 +1354,7 @@ declare @llvm.vp.minimum.nxv4f32(, @vfmin_vv_nxv4f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -1381,6 +1390,7 @@ declare @llvm.vp.minimum.nxv8f32(, @vfmin_vv_nxv8f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -1416,6 +1426,7 @@ declare @llvm.vp.minimum.nxv1f64(, @vfmin_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -1449,6 +1460,7 @@ declare @llvm.vp.minimum.nxv2f64(, @vfmin_vv_nxv2f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t @@ -1484,6 +1496,7 @@ declare @llvm.vp.minimum.nxv4f64(, @vfmin_vv_nxv4f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t @@ -1525,6 +1538,7 @@ define @vfmin_vv_nxv8f64( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, This Inner Loop Header: Depth=1 ; RV32-NEXT: th.lrb a0, a1, a0, 0 -; RV32-NEXT: vmv1r.v v9, v8 ; RV32-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV32-NEXT: vmv1r.v v9, v8 ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmseq.vi v9, v9, 0 @@ -45,8 +45,8 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: th.lrb a0, a1, a0, 0 -; RV64-NEXT: vmv1r.v v9, v8 ; RV64-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64-NEXT: vmv1r.v v9, v8 ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmseq.vi v9, v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index c7e3c8cb519829..2078670f330e28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -703,6 +703,7 @@ define @fshl_v16i32( %a, @fshl_v7i64( %a, ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: li a0, 63 @@ -953,6 +955,7 @@ define @fshl_v8i64( %a, ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: li a0, 63 @@ -988,6 +991,7 @@ define @fshr_v16i64( %a, @fshr_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @test_specify_reg_mf2( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_mf2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v2, v9 ; CHECK-NEXT: vmv1r.v v1, v8 ; CHECK-NEXT: #APP ; CHECK-NEXT: vadd.vv v0, v1, v2 ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: ret entry: @@ -380,11 +382,13 @@ entry: define @test_specify_reg_m1( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_m1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v2, v9 ; CHECK-NEXT: vmv1r.v v1, v8 ; CHECK-NEXT: #APP ; CHECK-NEXT: vadd.vv v0, v1, v2 ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: ret entry: @@ -395,11 +399,13 @@ entry: define @test_specify_reg_m2( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_m2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v4, v10 ; CHECK-NEXT: vmv2r.v v2, v8 ; CHECK-NEXT: #APP ; CHECK-NEXT: vadd.vv v0, v2, v4 ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v0 ; CHECK-NEXT: ret entry: @@ -410,6 +416,7 @@ entry: define @test_specify_reg_mask( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_mask: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v2, v8 ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: #APP diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index 8925a9e0cee321..86a5a4878c6569 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -5,6 +5,7 @@ define @insert_nxv8i32_nxv4i32_0( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv4i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv4i32.nxv8i32( %vec, %subvec, i64 0) @@ -14,6 +15,7 @@ define @insert_nxv8i32_nxv4i32_0( %vec, @insert_nxv8i32_nxv4i32_4( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv4i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v10, v12 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv4i32.nxv8i32( %vec, %subvec, i64 4) @@ -23,6 +25,7 @@ define @insert_nxv8i32_nxv4i32_4( %vec, @insert_nxv8i32_nxv2i32_0( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv2i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 0) @@ -32,6 +35,7 @@ define @insert_nxv8i32_nxv2i32_0( %vec, @insert_nxv8i32_nxv2i32_2( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv2i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v12 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 2) @@ -41,6 +45,7 @@ define @insert_nxv8i32_nxv2i32_2( %vec, @insert_nxv8i32_nxv2i32_4( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv2i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v12 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 4) @@ -50,6 +55,7 @@ define @insert_nxv8i32_nxv2i32_4( %vec, @insert_nxv8i32_nxv2i32_6( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv2i32_6: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v12 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv8i32( %vec, %subvec, i64 6) @@ -86,6 +92,7 @@ define @insert_nxv1i8_nxv4i8_3( %vec, @insert_nxv16i32_nxv8i32_0( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv8i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv8i32.nxv16i32( %vec, %subvec, i64 0) @@ -95,6 +102,7 @@ define @insert_nxv16i32_nxv8i32_0( %vec, define @insert_nxv16i32_nxv8i32_8( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv8i32_8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v12, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv8i32.nxv16i32( %vec, %subvec, i64 8) @@ -104,6 +112,7 @@ define @insert_nxv16i32_nxv8i32_8( %vec, define @insert_nxv16i32_nxv4i32_0( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv4i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 0) @@ -113,6 +122,7 @@ define @insert_nxv16i32_nxv4i32_0( %vec, define @insert_nxv16i32_nxv4i32_4( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv4i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v10, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 4) @@ -122,6 +132,7 @@ define @insert_nxv16i32_nxv4i32_4( %vec, define @insert_nxv16i32_nxv4i32_8( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv4i32_8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v12, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 8) @@ -131,6 +142,7 @@ define @insert_nxv16i32_nxv4i32_8( %vec, define @insert_nxv16i32_nxv4i32_12( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv4i32_12: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v14, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv4i32.nxv16i32( %vec, %subvec, i64 12) @@ -140,6 +152,7 @@ define @insert_nxv16i32_nxv4i32_12( %vec, define @insert_nxv16i32_nxv2i32_0( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 0) @@ -149,6 +162,7 @@ define @insert_nxv16i32_nxv2i32_0( %vec, define @insert_nxv16i32_nxv2i32_2( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 2) @@ -158,6 +172,7 @@ define @insert_nxv16i32_nxv2i32_2( %vec, define @insert_nxv16i32_nxv2i32_4( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 4) @@ -167,6 +182,7 @@ define @insert_nxv16i32_nxv2i32_4( %vec, define @insert_nxv16i32_nxv2i32_6( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_6: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 6) @@ -176,6 +192,7 @@ define @insert_nxv16i32_nxv2i32_6( %vec, define @insert_nxv16i32_nxv2i32_8( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 8) @@ -185,6 +202,7 @@ define @insert_nxv16i32_nxv2i32_8( %vec, define @insert_nxv16i32_nxv2i32_10( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_10: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 10) @@ -194,6 +212,7 @@ define @insert_nxv16i32_nxv2i32_10( %vec, define @insert_nxv16i32_nxv2i32_12( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_12: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 12) @@ -203,6 +222,7 @@ define @insert_nxv16i32_nxv2i32_12( %vec, define @insert_nxv16i32_nxv2i32_14( %vec, %subvec) { ; CHECK-LABEL: insert_nxv16i32_nxv2i32_14: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v15, v16 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2i32.nxv16i32( %vec, %subvec, i64 14) @@ -512,6 +532,7 @@ define @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 { define @insert_insert_combine(<2 x i32> %subvec) { ; CHECK-LABEL: insert_insert_combine: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: ret %inner = call @llvm.vector.insert.nxv4i32.v2i32( undef, <2 x i32> %subvec, i64 0) @@ -524,6 +545,7 @@ define @insert_insert_combine(<2 x i32> %subvec) { define @insert_insert_combine2( %subvec) { ; CHECK-LABEL: insert_insert_combine2: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: ret %inner = call @llvm.vector.insert.nxv2i32.nxv4i32( undef, %subvec, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll index ffb9bf76fb4fab..166dba6a565243 100644 --- a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll @@ -55,11 +55,11 @@ declare @llvm.vp.llrint.nxv8i64.nxv8f32(, define @llrint_nxv16i64_nxv16f32( %x, %m, i32 zeroext %evl) { ; CHECK-LABEL: llrint_nxv16i64_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll index 9991bbc9725ba3..21045b69a8b5dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll @@ -117,11 +117,11 @@ define @lrint_nxv16f32( %x, @llvm.riscv.viota.mask.nxv1i8( define @intrinsic_viota_mask_m_nxv1i8_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i8_nxv1i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -1312,6 +1313,7 @@ declare @llvm.riscv.vmsbf.mask.nxv1i1( define @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -1443,6 +1445,7 @@ declare @llvm.riscv.vmsbf.mask.nxv64i1( define @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index a3eddbcc2baed4..727908b67c6dd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -221,11 +221,13 @@ define @mgather_truemask_nxv4i8( %ptrs, @mgather_falsemask_nxv4i8( %ptrs, %passthru) { ; RV32-LABEL: mgather_falsemask_nxv4i8: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_falsemask_nxv4i8: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v8, v12 ; RV64-NEXT: ret %v = call @llvm.masked.gather.nxv4i8.nxv4p0( %ptrs, i32 1, zeroinitializer, %passthru) @@ -442,11 +444,13 @@ define @mgather_truemask_nxv4i16( %ptrs, @mgather_falsemask_nxv4i16( %ptrs, %passthru) { ; RV32-LABEL: mgather_falsemask_nxv4i16: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_falsemask_nxv4i16: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v8, v12 ; RV64-NEXT: ret %v = call @llvm.masked.gather.nxv4i16.nxv4p0( %ptrs, i32 2, zeroinitializer, %passthru) @@ -686,11 +690,13 @@ define @mgather_truemask_nxv4i32( %ptrs, @mgather_falsemask_nxv4i32( %ptrs, %passthru) { ; RV32-LABEL: mgather_falsemask_nxv4i32: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv2r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_falsemask_nxv4i32: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv2r.v v8, v12 ; RV64-NEXT: ret %v = call @llvm.masked.gather.nxv4i32.nxv4p0( %ptrs, i32 4, zeroinitializer, %passthru) @@ -949,6 +955,7 @@ define @mgather_truemask_nxv4i64( %ptrs, @mgather_falsemask_nxv4i64( %ptrs, %passthru) { ; CHECK-LABEL: mgather_falsemask_nxv4i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %v = call @llvm.masked.gather.nxv4i64.nxv4p0( %ptrs, i32 8, zeroinitializer, %passthru) @@ -1232,12 +1239,12 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: addi a3, sp, 16 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmv8r.v v16, v8 ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: vl8re64.v v8, (a1) ; RV64-NEXT: srli a1, a0, 3 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v7, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t @@ -1348,11 +1355,13 @@ define @mgather_truemask_nxv4bf16( %ptrs define @mgather_falsemask_nxv4bf16( %ptrs, %passthru) { ; RV32-LABEL: mgather_falsemask_nxv4bf16: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_falsemask_nxv4bf16: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v8, v12 ; RV64-NEXT: ret %v = call @llvm.masked.gather.nxv4bf16.nxv4p0( %ptrs, i32 2, zeroinitializer, %passthru) @@ -1549,11 +1558,13 @@ define @mgather_truemask_nxv4f16( %ptrs, < define @mgather_falsemask_nxv4f16( %ptrs, %passthru) { ; RV32-LABEL: mgather_falsemask_nxv4f16: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_falsemask_nxv4f16: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v8, v12 ; RV64-NEXT: ret %v = call @llvm.masked.gather.nxv4f16.nxv4p0( %ptrs, i32 2, zeroinitializer, %passthru) @@ -1749,11 +1760,13 @@ define @mgather_truemask_nxv4f32( %ptrs, define @mgather_falsemask_nxv4f32( %ptrs, %passthru) { ; RV32-LABEL: mgather_falsemask_nxv4f32: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv2r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_falsemask_nxv4f32: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv2r.v v8, v12 ; RV64-NEXT: ret %v = call @llvm.masked.gather.nxv4f32.nxv4p0( %ptrs, i32 4, zeroinitializer, %passthru) @@ -2012,6 +2025,7 @@ define @mgather_truemask_nxv4f64( %ptrs, define @mgather_falsemask_nxv4f64( %ptrs, %passthru) { ; CHECK-LABEL: mgather_falsemask_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %v = call @llvm.masked.gather.nxv4f64.nxv4p0( %ptrs, i32 8, zeroinitializer, %passthru) @@ -2317,8 +2331,8 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v16, v0 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v16, v0 ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 72c251ce985cbf..77a1f508d22184 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -2009,11 +2009,11 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64( %val0, @reverse_nxv32i8( %a) { define @reverse_nxv64i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv8r.v v16, v8 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vid.v v8 ; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma @@ -1188,10 +1188,10 @@ define @reverse_nxv64i8( %a) { ; ; RV32-BITS-256-LABEL: reverse_nxv64i8: ; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; RV32-BITS-256-NEXT: vmv8r.v v16, v8 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: addi a0, a0, -1 -; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV32-BITS-256-NEXT: vid.v v8 ; RV32-BITS-256-NEXT: vrsub.vx v24, v8, a0 ; RV32-BITS-256-NEXT: vrgather.vv v15, v16, v24 @@ -1206,10 +1206,10 @@ define @reverse_nxv64i8( %a) { ; ; RV32-BITS-512-LABEL: reverse_nxv64i8: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; RV32-BITS-512-NEXT: vmv8r.v v16, v8 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: addi a0, a0, -1 -; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV32-BITS-512-NEXT: vid.v v8 ; RV32-BITS-512-NEXT: vrsub.vx v24, v8, a0 ; RV32-BITS-512-NEXT: vrgather.vv v15, v16, v24 @@ -1224,10 +1224,10 @@ define @reverse_nxv64i8( %a) { ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv8r.v v16, v8 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma @@ -1243,10 +1243,10 @@ define @reverse_nxv64i8( %a) { ; ; RV64-BITS-256-LABEL: reverse_nxv64i8: ; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; RV64-BITS-256-NEXT: vmv8r.v v16, v8 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: addi a0, a0, -1 -; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV64-BITS-256-NEXT: vid.v v8 ; RV64-BITS-256-NEXT: vrsub.vx v24, v8, a0 ; RV64-BITS-256-NEXT: vrgather.vv v15, v16, v24 @@ -1261,10 +1261,10 @@ define @reverse_nxv64i8( %a) { ; ; RV64-BITS-512-LABEL: reverse_nxv64i8: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; RV64-BITS-512-NEXT: vmv8r.v v16, v8 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: addi a0, a0, -1 -; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: vrsub.vx v24, v8, a0 ; RV64-BITS-512-NEXT: vrgather.vv v15, v16, v24 @@ -1367,11 +1367,11 @@ define @reverse_nxv16i16( %a) { define @reverse_nxv32i16( %a) { ; CHECK-LABEL: reverse_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vx v24, v8, a0 ; CHECK-NEXT: vrgather.vv v15, v16, v24 @@ -1458,11 +1458,11 @@ define @reverse_nxv8i32( %a) { define @reverse_nxv16i32( %a) { ; CHECK-LABEL: reverse_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vx v24, v8, a0 ; CHECK-NEXT: vrgather.vv v15, v16, v24 @@ -1533,11 +1533,11 @@ define @reverse_nxv4i64( %a) { define @reverse_nxv8i64( %a) { ; CHECK-LABEL: reverse_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vx v24, v8, a0 ; CHECK-NEXT: vrgather.vv v15, v16, v24 @@ -1644,11 +1644,11 @@ define @reverse_nxv16bf16( %a) { define @reverse_nxv32bf16( %a) { ; CHECK-LABEL: reverse_nxv32bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vx v24, v8, a0 ; CHECK-NEXT: vrgather.vv v15, v16, v24 @@ -1751,11 +1751,11 @@ define @reverse_nxv16f16( %a) { define @reverse_nxv32f16( %a) { ; CHECK-LABEL: reverse_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vx v24, v8, a0 ; CHECK-NEXT: vrgather.vv v15, v16, v24 @@ -1842,11 +1842,11 @@ define @reverse_nxv8f32( %a) { define @reverse_nxv16f32( %a) { ; CHECK-LABEL: reverse_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vx v24, v8, a0 ; CHECK-NEXT: vrgather.vv v15, v16, v24 @@ -1917,11 +1917,11 @@ define @reverse_nxv4f64( %a) { define @reverse_nxv8f64( %a) { ; CHECK-LABEL: reverse_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vx v24, v8, a0 ; CHECK-NEXT: vrgather.vv v15, v16, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 94fce80ad3b8e4..096d9864f940a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -117,8 +117,8 @@ declare @llvm.vp.nearbyint.nxv4bf16(, define @vp_nearbyint_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -169,8 +169,8 @@ declare @llvm.vp.nearbyint.nxv8bf16(, define @vp_nearbyint_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -221,8 +221,8 @@ declare @llvm.vp.nearbyint.nxv16bf16( @vp_nearbyint_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -273,9 +273,9 @@ declare @llvm.vp.nearbyint.nxv32bf16( @vp_nearbyint_nxv32bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv32bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 @@ -566,8 +566,8 @@ define @vp_nearbyint_nxv4f16( %va, @llvm.vp.nearbyint.nxv8f16(, @vp_nearbyint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) @@ -652,8 +653,8 @@ define @vp_nearbyint_nxv8f16( %va, @llvm.vp.nearbyint.nxv16f16(, < define @vp_nearbyint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) @@ -738,8 +740,8 @@ define @vp_nearbyint_nxv16f16( %va, @llvm.vp.nearbyint.nxv32f16(, < define @vp_nearbyint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) @@ -824,9 +827,9 @@ define @vp_nearbyint_nxv32f16( %va, @llvm.vp.nearbyint.nxv4f32(, @vp_nearbyint_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -1080,6 +1084,7 @@ declare @llvm.vp.nearbyint.nxv8f32(, @vp_nearbyint_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -1124,6 +1129,7 @@ declare @llvm.vp.nearbyint.nxv16f32(, define @vp_nearbyint_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1210,6 +1216,7 @@ declare @llvm.vp.nearbyint.nxv2f64(, define @vp_nearbyint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) @@ -1254,6 +1261,7 @@ declare @llvm.vp.nearbyint.nxv4f64(, define @vp_nearbyint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) @@ -1298,6 +1306,7 @@ declare @llvm.vp.nearbyint.nxv7f64(, define @vp_nearbyint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) @@ -1342,6 +1351,7 @@ declare @llvm.vp.nearbyint.nxv8f64(, define @vp_nearbyint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) @@ -1387,13 +1397,13 @@ declare @llvm.vp.nearbyint.nxv16f64( @vp_nearbyint_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll index 37c67b9ff2f6af..dd7debd3ab0466 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll @@ -23,10 +23,10 @@ define i1 @foo( %x, i64 %y) { ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: addi a2, sp, 64 ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: add a1, a2, a1 @@ -53,8 +53,8 @@ define i1 @foo( %x, i64 %y) { define i8 @bar( %x, i64 %y) { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v1, v8 ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v1, v8 ; CHECK-NEXT: vslidedown.vx v8, v0, a0 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index 2a69dd31118bd8..f40f828e834bec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -109,8 +109,8 @@ declare @llvm.vp.rint.nxv4bf16(, @vp_rint_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -157,8 +157,8 @@ declare @llvm.vp.rint.nxv8bf16(, @vp_rint_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -205,8 +205,8 @@ declare @llvm.vp.rint.nxv16bf16(, < define @vp_rint_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -259,9 +259,9 @@ define @vp_rint_nxv32bf16( %va, @vp_rint_nxv4f16( %va, @llvm.vp.rint.nxv8f16(, @vp_rint_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) @@ -613,8 +614,8 @@ define @vp_rint_nxv8f16( %va, @llvm.vp.rint.nxv16f16(, @vp_rint_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) @@ -691,8 +693,8 @@ define @vp_rint_nxv16f16( %va, @llvm.vp.rint.nxv32f16(, @vp_rint_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_rint_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) @@ -775,9 +778,9 @@ define @vp_rint_nxv32f16( %va, @llvm.vp.rint.nxv4f32(, @vp_rint_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -1018,6 +1022,7 @@ declare @llvm.vp.rint.nxv8f32(, @vp_rint_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -1058,6 +1063,7 @@ declare @llvm.vp.rint.nxv16f32(, @vp_rint_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1136,6 +1142,7 @@ declare @llvm.vp.rint.nxv2f64(, @vp_rint_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) @@ -1176,6 +1183,7 @@ declare @llvm.vp.rint.nxv4f64(, @vp_rint_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) @@ -1216,6 +1224,7 @@ declare @llvm.vp.rint.nxv7f64(, @vp_rint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) @@ -1256,6 +1265,7 @@ declare @llvm.vp.rint.nxv8f64(, @vp_rint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) @@ -1303,13 +1313,13 @@ define @vp_rint_nxv16f64( %va, @llvm.vp.round.nxv4bf16(, @vp_round_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -169,8 +169,8 @@ declare @llvm.vp.round.nxv8bf16(, @vp_round_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -221,8 +221,8 @@ declare @llvm.vp.round.nxv16bf16(, define @vp_round_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -279,9 +279,9 @@ define @vp_round_nxv32bf16( %va, @vp_round_nxv4f16( %va, @llvm.vp.round.nxv8f16(, @vp_round_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) @@ -668,8 +669,8 @@ define @vp_round_nxv8f16( %va, @llvm.vp.round.nxv16f16(, @vp_round_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) @@ -754,8 +756,8 @@ define @vp_round_nxv16f16( %va, @llvm.vp.round.nxv32f16(, @vp_round_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) @@ -846,9 +849,9 @@ define @vp_round_nxv32f16( %va, @llvm.vp.round.nxv4f32(, @vp_round_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -1112,6 +1116,7 @@ declare @llvm.vp.round.nxv8f32(, @vp_round_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -1156,6 +1161,7 @@ declare @llvm.vp.round.nxv16f32(, @vp_round_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1242,6 +1248,7 @@ declare @llvm.vp.round.nxv2f64(, @vp_round_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) @@ -1286,6 +1293,7 @@ declare @llvm.vp.round.nxv4f64(, @vp_round_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) @@ -1330,6 +1338,7 @@ declare @llvm.vp.round.nxv7f64(, @vp_round_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) @@ -1374,6 +1383,7 @@ declare @llvm.vp.round.nxv8f64(, @vp_round_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) @@ -1425,13 +1435,13 @@ define @vp_round_nxv16f64( %va, @llvm.vp.roundeven.nxv4bf16(, define @vp_roundeven_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -169,8 +169,8 @@ declare @llvm.vp.roundeven.nxv8bf16(, define @vp_roundeven_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -221,8 +221,8 @@ declare @llvm.vp.roundeven.nxv16bf16( @vp_roundeven_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -279,9 +279,9 @@ define @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 @@ -582,8 +582,8 @@ define @vp_roundeven_nxv4f16( %va, @llvm.vp.roundeven.nxv8f16(, @vp_roundeven_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) @@ -668,8 +669,8 @@ define @vp_roundeven_nxv8f16( %va, @llvm.vp.roundeven.nxv16f16(, < define @vp_roundeven_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) @@ -754,8 +756,8 @@ define @vp_roundeven_nxv16f16( %va, @llvm.vp.roundeven.nxv32f16(, < define @vp_roundeven_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) @@ -846,9 +849,9 @@ define @vp_roundeven_nxv32f16( %va, @llvm.vp.roundeven.nxv4f32(, @vp_roundeven_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -1112,6 +1116,7 @@ declare @llvm.vp.roundeven.nxv8f32(, @vp_roundeven_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -1156,6 +1161,7 @@ declare @llvm.vp.roundeven.nxv16f32(, define @vp_roundeven_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1242,6 +1248,7 @@ declare @llvm.vp.roundeven.nxv2f64(, define @vp_roundeven_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) @@ -1286,6 +1293,7 @@ declare @llvm.vp.roundeven.nxv4f64(, define @vp_roundeven_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) @@ -1330,6 +1338,7 @@ declare @llvm.vp.roundeven.nxv7f64(, define @vp_roundeven_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) @@ -1374,6 +1383,7 @@ declare @llvm.vp.roundeven.nxv8f64(, define @vp_roundeven_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) @@ -1425,13 +1435,13 @@ define @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 96c821a76ae84e..13fdc11145001e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -117,8 +117,8 @@ declare @llvm.vp.roundtozero.nxv4bf16( @vp_roundtozero_nxv4bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma @@ -169,8 +169,8 @@ declare @llvm.vp.roundtozero.nxv8bf16( @vp_roundtozero_nxv8bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma @@ -221,8 +221,8 @@ declare @llvm.vp.roundtozero.nxv16bf16( @vp_roundtozero_nxv16bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -279,9 +279,9 @@ define @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 ; CHECK-NEXT: lui a3, 307200 ; CHECK-NEXT: slli a1, a2, 1 @@ -582,8 +582,8 @@ define @vp_roundtozero_nxv4f16( %va, @llvm.vp.roundtozero.nxv8f16(, @vp_roundtozero_nxv8f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI18_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1) @@ -668,8 +669,8 @@ define @vp_roundtozero_nxv8f16( %va, @llvm.vp.roundtozero.nxv16f16(, define @vp_roundtozero_nxv16f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1) @@ -754,8 +756,8 @@ define @vp_roundtozero_nxv16f16( %va, < ; ; ZVFHMIN-LABEL: vp_roundtozero_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -821,6 +823,7 @@ declare @llvm.vp.roundtozero.nxv32f16(, define @vp_roundtozero_nxv32f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 ; ZVFH-NEXT: lui a1, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1) @@ -846,9 +849,9 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: lui a3, 307200 ; ZVFHMIN-NEXT: slli a1, a2, 1 @@ -1068,6 +1071,7 @@ declare @llvm.vp.roundtozero.nxv4f32(, define @vp_roundtozero_nxv4f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t @@ -1112,6 +1116,7 @@ declare @llvm.vp.roundtozero.nxv8f32(, define @vp_roundtozero_nxv8f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t @@ -1156,6 +1161,7 @@ declare @llvm.vp.roundtozero.nxv16f32( @vp_roundtozero_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -1242,6 +1248,7 @@ declare @llvm.vp.roundtozero.nxv2f64( define @vp_roundtozero_nxv2f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1) @@ -1286,6 +1293,7 @@ declare @llvm.vp.roundtozero.nxv4f64( define @vp_roundtozero_nxv4f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1) @@ -1330,6 +1338,7 @@ declare @llvm.vp.roundtozero.nxv7f64( define @vp_roundtozero_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1) @@ -1374,6 +1383,7 @@ declare @llvm.vp.roundtozero.nxv8f64( define @vp_roundtozero_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1) @@ -1425,13 +1435,13 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index aef160049106b9..30180e4c41218e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -17,6 +17,7 @@ define @foo( %a, @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -90,6 +91,7 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -166,6 +168,7 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill @@ -246,6 +249,7 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma ; SPILL-O0-NEXT: vmv4r.v v8, v12 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill @@ -326,6 +330,7 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg3e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll index c7c44fb0e12158..a7a134db93b0eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -20,6 +20,7 @@ define @foo( %a, @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -90,6 +91,7 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma ; SPILL-O0-NEXT: vmv1r.v v8, v9 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -166,6 +168,7 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill @@ -246,6 +249,7 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8m4_v12m4 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, tu, ma ; SPILL-O0-NEXT: vmv4r.v v8, v12 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill @@ -326,6 +330,7 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: # implicit-def: $v8m2_v10m2_v12m2 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vlseg3e32.v v8, (a0) +; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; SPILL-O0-NEXT: vmv2r.v v8, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll index b27ba14e85c839..2e3f7c9bc2a6ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -47,6 +47,7 @@ define @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, ; CHECK-NEXT: vs8r.v v8, (t1) ; CHECK-NEXT: sd t1, 0(sp) ; CHECK-NEXT: sd t0, 8(sp) +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: call bar ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 23ebfade6f6b0f..ebafaed09929a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -941,6 +941,7 @@ declare @llvm.riscv.vredsum.nxv2i32.nxv2i32( define @vredsum( %passthru, %x, %y, %m, i64 %vl) { ; CHECK-LABEL: vredsum: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vredsum.vs v11, v9, v10 @@ -965,6 +966,7 @@ define @vfredusum( %passthru, @vfredusum_allones_mask( %passth define @unfoldable_vredsum_allones_mask_diff_vl( %passthru, %x, %y) { ; CHECK-LABEL: unfoldable_vredsum_allones_mask_diff_vl: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma +; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vredsum.vs v11, v9, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma ; CHECK-NEXT: vmv.v.v v8, v11 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index 6c11e9413525e0..70b53841bff4c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1473,6 +1473,7 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -1173,8 +1173,8 @@ define @icmp_eq_vx_nxv128i8( %va, i8 %b, define @icmp_eq_vx_swap_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -2244,6 +2244,7 @@ define @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 ; CHECK-NEXT: slli a3, a3, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a4, a1, a3 ; CHECK-NEXT: sltu a5, a1, a4 @@ -2332,11 +2332,11 @@ define @icmp_eq_vx_nxv32i32( %va, i32 %b, define @icmp_eq_vx_swap_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 ; CHECK-NEXT: slli a3, a3, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a4, a1, a3 ; CHECK-NEXT: sltu a5, a1, a4 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index c91b02e8f15e47..bff6737abf9a39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -4865,10 +4865,10 @@ declare <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32>, <4 x i32>, metadata, <4 x i1>, i define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_icmp: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: lui a3, 1 ; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: .LBB102_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4906,10 +4906,10 @@ declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1 define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fcmp: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: .LBB103_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index f8315de324e42b..e1cd2dade3124f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -663,6 +663,7 @@ declare @llvm.experimental.vp.strided.load.nxv3f64.p0.i32( define @strided_load_nxv16f64(ptr %ptr, i64 %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_load_nxv16f64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v9, v0 ; CHECK-RV32-NEXT: csrr a4, vlenb ; CHECK-RV32-NEXT: sub a2, a3, a4 @@ -688,6 +689,7 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @llvm.experimental.vp.strided.load.nxv16f64.p0.i6 define @strided_load_nxv17f64(ptr %ptr, i64 %stride, %mask, i32 zeroext %evl, ptr %hi_ptr) { ; CHECK-RV32-LABEL: strided_load_nxv17f64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v8, v0 ; CHECK-RV32-NEXT: csrr a2, vlenb ; CHECK-RV32-NEXT: slli a7, a2, 1 @@ -812,6 +815,7 @@ define @strided_load_nxv17f64(ptr %ptr, i64 %stride, %v, ptr %ptr, i32 sig ; CHECK-NEXT: slli a4, a4, 3 ; CHECK-NEXT: sub sp, sp, a4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a4, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll index ab13c78da05e87..c9f9a79733003e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll @@ -158,8 +158,8 @@ declare @llvm.riscv.vrgatherei16.vv.nxv8i8.i64( %v, ptr noalias %q) { ; CHECK-LABEL: repeat_shuffle: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vse64.v v10, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index ebd550013ec78f..fee6799e992f31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -565,8 +565,8 @@ declare @llvm.vp.add.nxv128i8(, @vadd_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -1343,11 +1343,11 @@ declare @llvm.vp.add.nxv32i32(, @vadd_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -1399,11 +1399,11 @@ declare i32 @llvm.vscale.i32() define @vadd_vi_nxv32i32_evl_nx8( %va, %m) { ; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a2, a0, 2 ; CHECK-NEXT: slli a1, a0, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll index e59a9174b03d94..dbe0780ce9bb89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll @@ -43,6 +43,7 @@ declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv1i1( define iXLen @intrinsic_vcpop_mask_m_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv1i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -97,6 +98,7 @@ declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv2i1( define iXLen @intrinsic_vcpop_mask_m_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv2i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -137,6 +139,7 @@ declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv4i1( define iXLen @intrinsic_vcpop_mask_m_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv4i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -177,6 +180,7 @@ declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv8i1( define iXLen @intrinsic_vcpop_mask_m_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv8i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -217,6 +221,7 @@ declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv16i1( define iXLen @intrinsic_vcpop_mask_m_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv16i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -257,6 +262,7 @@ declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv32i1( define iXLen @intrinsic_vcpop_mask_m_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv32i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -297,6 +303,7 @@ declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv64i1( define iXLen @intrinsic_vcpop_mask_m_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv64i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 41cf886c3ab75d..731ad81b687758 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -7,8 +7,8 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-LABEL: vector_deinterleave_v16i1_v32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index f20a90a4223139..8509583d372391 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -131,8 +131,8 @@ ret {, } %retval define {, } @vector_deinterleave_nxv64i8_nxv128i8( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i8_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 8 ; CHECK-NEXT: vnsrl.wi v12, v16, 0 @@ -146,8 +146,8 @@ ret {, } %retval define {, } @vector_deinterleave_nxv32i16_nxv64i16( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv32i16_nxv64i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 16 ; CHECK-NEXT: vnsrl.wi v12, v16, 0 @@ -161,9 +161,9 @@ ret {, } %retval define {, } @vector_deinterleave_nxv16i32_nxvv32i32( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv16i32_nxvv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 ; CHECK-NEXT: vnsrl.wi v0, v8, 0 @@ -189,8 +189,8 @@ define {, } @vector_deinterleave_nxv8i64_nxv ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v0, v8, v8 ; CHECK-NEXT: vrgather.vv v8, v24, v0 @@ -387,8 +387,8 @@ declare {, } @llvm.vector.deinterleave define {, } @vector_deinterleave_nxv32bf16_nxv64bf16( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv32bf16_nxv64bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 16 ; CHECK-NEXT: vnsrl.wi v12, v16, 0 @@ -402,8 +402,8 @@ ret {, } %retval define {, } @vector_deinterleave_nxv32f16_nxv64f16( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv32f16_nxv64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v0, v24, 16 ; CHECK-NEXT: vnsrl.wi v12, v16, 0 @@ -417,9 +417,9 @@ ret {, } %retval define {, } @vector_deinterleave_nxv16f32_nxv32f32( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv16f32_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 ; CHECK-NEXT: vnsrl.wi v0, v8, 0 @@ -445,8 +445,8 @@ define {, } @vector_deinterleave_nxv8f ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v0, v8, v8 ; CHECK-NEXT: vrgather.vv v8, v24, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index 7b0ac01918b9bd..08aa02c7e869a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -91,10 +91,10 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) { define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: vector_interleave_v4i64_v2i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: lui a0, 12304 ; CHECK-NEXT: addi a0, a0, 512 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -106,10 +106,10 @@ define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) { ; ; ZVBB-LABEL: vector_interleave_v4i64_v2i64: ; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; ZVBB-NEXT: vmv1r.v v10, v9 ; ZVBB-NEXT: lui a0, 12304 ; ZVBB-NEXT: addi a0, a0, 512 -; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; ZVBB-NEXT: vslideup.vi v8, v10, 2 ; ZVBB-NEXT: vmv.s.x v10, a0 ; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -239,10 +239,10 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: vector_interleave_v4f64_v2f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: lui a0, 12304 ; CHECK-NEXT: addi a0, a0, 512 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma @@ -254,10 +254,10 @@ define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> ; ; ZVBB-LABEL: vector_interleave_v4f64_v2f64: ; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; ZVBB-NEXT: vmv1r.v v10, v9 ; ZVBB-NEXT: lui a0, 12304 ; ZVBB-NEXT: addi a0, a0, 512 -; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; ZVBB-NEXT: vslideup.vi v8, v10, 2 ; ZVBB-NEXT: vmv.s.x v10, a0 ; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index bc203e215d8786..9b78f31d399d9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -9,9 +9,9 @@ define void @vector_interleave_store_nxv32i1_nxv16i1( %a, %b, ptr %p) { ; CHECK-LABEL: vector_interleave_store_nxv32i1_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: csrr a2, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index 26e9afcb1d109b..864acb320d8fe1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -11,9 +11,9 @@ define @vector_interleave_nxv32i1_nxv16i1( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv32i1_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: csrr a1, vlenb @@ -32,9 +32,9 @@ define @vector_interleave_nxv32i1_nxv16i1( ; ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1: ; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; ZVBB-NEXT: vmv1r.v v9, v0 ; ZVBB-NEXT: vmv1r.v v0, v8 -; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; ZVBB-NEXT: vmv.v.i v10, 0 ; ZVBB-NEXT: li a0, 1 ; ZVBB-NEXT: csrr a1, vlenb @@ -160,9 +160,9 @@ declare @llvm.vector.interleave2.nxv4i64(, define @vector_interleave_nxv128i1_nxv64i1( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv128i1_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 @@ -203,8 +203,8 @@ define @vector_interleave_nxv128i1_nxv64i1( @vector_interleave_nxv128i8_nxv64i8( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv128i8_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vwaddu.vv v8, v24, v16 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwaddu.vv v0, v28, v20 @@ -215,8 +215,8 @@ define @vector_interleave_nxv128i8_nxv64i8( @vector_interleave_nxv128i8_nxv64i8( @vector_interleave_nxv64i16_nxv32i16( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv64i16_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vwaddu.vv v8, v24, v16 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwaddu.vv v0, v28, v20 @@ -242,8 +242,8 @@ define @vector_interleave_nxv64i16_nxv32i16( @vector_interleave_nxv64i16_nxv32i16( @vector_interleave_nxv32i32_nxv16i32( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv32i32_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vwaddu.vv v8, v24, v16 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwaddu.vv v0, v28, v20 @@ -269,9 +269,9 @@ define @vector_interleave_nxv32i32_nxv16i32( @llvm.vector.interleave2.nxv4f64( @vector_interleave_nxv64bf16_nxv32bf16( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv64bf16_nxv32bf16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vwaddu.vv v8, v24, v16 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwaddu.vv v0, v28, v20 @@ -587,8 +587,8 @@ define @vector_interleave_nxv64bf16_nxv32bf16( @vector_interleave_nxv64bf16_nxv32bf16( @vector_interleave_nxv64f16_nxv32f16( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv64f16_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vwaddu.vv v8, v24, v16 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwaddu.vv v0, v28, v20 @@ -614,8 +614,8 @@ define @vector_interleave_nxv64f16_nxv32f16( @vector_interleave_nxv64f16_nxv32f16( @vector_interleave_nxv32f32_nxv16f32( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv32f32_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vwaddu.vv v8, v24, v16 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwaddu.vv v0, v28, v20 @@ -641,9 +641,9 @@ define @vector_interleave_nxv32f32_nxv16f32( @vadd_vv_passthru( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: vadd_vv_passthru: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vadd.vv v10, v8, v9 @@ -152,6 +153,7 @@ entry: define @vadd_vv_passthru_negative( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: vadd_vv_passthru_negative: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vadd.vv v10, v8, v9 @@ -183,6 +185,7 @@ entry: define @vadd_vv_mask( %0, %1, i32 %2, %m) nounwind { ; CHECK-LABEL: vadd_vv_mask: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t @@ -218,6 +221,7 @@ entry: define @vadd_vv_mask_negative( %0, %1, i32 %2, %m, %m2) nounwind { ; CHECK-LABEL: vadd_vv_mask_negative: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vadd.vv v11, v8, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index 6a72043ca7e8e6..90d798b167cfc5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -11,9 +11,9 @@ declare @llvm.vector.splice.nxv1i1(, @splice_nxv1i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -33,9 +33,9 @@ define @splice_nxv1i1_offset_negone( %a, @splice_nxv1i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -59,9 +59,9 @@ declare @llvm.vector.splice.nxv2i1(, @splice_nxv2i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -81,9 +81,9 @@ define @splice_nxv2i1_offset_negone( %a, @splice_nxv2i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -107,9 +107,9 @@ declare @llvm.vector.splice.nxv4i1(, @splice_nxv4i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -129,9 +129,9 @@ define @splice_nxv4i1_offset_negone( %a, @splice_nxv4i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -155,9 +155,9 @@ declare @llvm.vector.splice.nxv8i1(, @splice_nxv8i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -176,9 +176,9 @@ define @splice_nxv8i1_offset_negone( %a, @splice_nxv8i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 @@ -201,9 +201,9 @@ declare @llvm.vector.splice.nxv16i1(, @splice_nxv16i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 @@ -223,9 +223,9 @@ define @splice_nxv16i1_offset_negone( %a, < define @splice_nxv16i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 @@ -249,9 +249,9 @@ declare @llvm.vector.splice.nxv32i1(, @splice_nxv32i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv32i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 @@ -296,9 +296,9 @@ declare @llvm.vector.splice.nxv64i1(, @splice_nxv64i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv64i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index 2c92a5da8eecb7..8f9f9c4256c8f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -462,11 +462,11 @@ declare @llvm.vp.fabs.nxv16f64(, @vfabs_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 1953cfd2a0169f..87bc9f27d6dc96 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -411,11 +411,11 @@ define @vfadd_vv_nxv32bf16( %va, @vfadd_vf_nxv32bf16( %va, bf ; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: addi a3, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a1 @@ -604,10 +604,10 @@ define @vfadd_vf_nxv32bf16_unmasked( @vfadd_vv_nxv32f16( %va, @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 @@ -1416,10 +1416,10 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v7 ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index ccd286b7ee5fd3..061af454aa8bab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -373,11 +373,11 @@ define @vfdiv_vv_nxv32bf16( %va, @vfdiv_vf_nxv32bf16( %va, bf ; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: addi a3, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a1 @@ -566,10 +566,10 @@ define @vfdiv_vf_nxv32bf16_unmasked( @vfdiv_vv_nxv32f16( %va, @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 @@ -1328,10 +1328,10 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v7 ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll index eafd605c6110eb..906d0642143cb9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll @@ -43,6 +43,7 @@ declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv1i1( define iXLen @intrinsic_vfirst_mask_m_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv1i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma @@ -97,6 +98,7 @@ declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv2i1( define iXLen @intrinsic_vfirst_mask_m_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv2i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -137,6 +139,7 @@ declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv4i1( define iXLen @intrinsic_vfirst_mask_m_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv4i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -177,6 +180,7 @@ declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv8i1( define iXLen @intrinsic_vfirst_mask_m_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv8i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -217,6 +221,7 @@ declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv16i1( define iXLen @intrinsic_vfirst_mask_m_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv16i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -257,6 +262,7 @@ declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv32i1( define iXLen @intrinsic_vfirst_mask_m_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv32i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma @@ -297,6 +303,7 @@ declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv64i1( define iXLen @intrinsic_vfirst_mask_m_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv64i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index fd518d9be786de..c969e34411fc90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -628,6 +628,7 @@ define @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfnmadd_vv_nxv16f16( %va, @vfnmadd_vf_nxv16f16_neg_splat_commute( @vfnmsub_vv_nxv16f16( %va, @vfnmsub_vf_nxv16f16_neg_splat( ; ; ZVFHMIN-LABEL: vfnmsub_vf_nxv16f16_neg_splat: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv4r.v v4, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 @@ -8712,6 +8714,7 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vf_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v7 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: csrr a4, vlenb @@ -10001,11 +10004,11 @@ define @vfnmadd_vv_nxv32f16_unmasked_commuted( @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vv_nxv32f16_unmasked_commuted( @vfnmsub_vf_nxv32f16( %va, half ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a3, 8 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 @@ -12075,11 +12078,11 @@ define @vfnmsub_vf_nxv32f16_commute( %v ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: fmv.x.h a2, fa0 ; ZVFHMIN-NEXT: lui a3, 8 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a2 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll index 1d471ab2404b17..a4f3a7d3a09a87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll @@ -227,6 +227,7 @@ define @vfmadd_vv_nxv32bf16( %va, < ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv8r.v v0, v16 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -237,7 +238,6 @@ define @vfmadd_vv_nxv32bf16( %va, < ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 @@ -314,6 +314,7 @@ define @vfmadd_vf_nxv32bf16( %va, < ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -321,7 +322,6 @@ define @vfmadd_vf_nxv32bf16( %va, < ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: fmv.x.h a0, fa0 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -664,6 +664,7 @@ define @vfmadd_vv_nxv32f16( %va, @vfmadd_vv_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmadd_vv_nxv32bf16( %va, < ; ZVFH-NEXT: slli a1, a1, 5 ; ZVFH-NEXT: sub sp, sp, a1 ; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv8r.v v0, v16 ; ZVFH-NEXT: addi a1, sp, 16 ; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -236,7 +237,6 @@ define @vfmadd_vv_nxv32bf16( %va, < ; ZVFH-NEXT: add a0, sp, a0 ; ZVFH-NEXT: addi a0, a0, 16 ; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16 ; ZVFH-NEXT: csrr a0, vlenb ; ZVFH-NEXT: slli a0, a0, 3 @@ -316,6 +316,7 @@ define @vfmadd_vv_nxv32bf16( %va, < ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v0, v16 ; ZVFHMIN-NEXT: addi a1, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -326,7 +327,6 @@ define @vfmadd_vv_nxv32bf16( %va, < ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: li a1, 24 @@ -402,12 +402,12 @@ define @vfmadd_vf_nxv32bf16( %va, < ; ZVFH-NEXT: slli a0, a0, 5 ; ZVFH-NEXT: sub sp, sp, a0 ; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vmv8r.v v0, v16 ; ZVFH-NEXT: addi a0, sp, 16 ; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmv8r.v v16, v8 ; ZVFH-NEXT: fmv.x.h a0, fa0 -; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16 ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: slli a1, a1, 4 @@ -498,12 +498,12 @@ define @vfmadd_vf_nxv32bf16( %va, < ; ZVFHMIN-NEXT: slli a0, a0, 5 ; ZVFHMIN-NEXT: sub sp, sp, a0 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v0, v16 ; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a0, fa0 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 4 @@ -875,6 +875,7 @@ define @vfmadd_vv_nxv32f16( %va, @vfmadd_vv_nxv32f16( %va, @vfmadd_vf_nxv32f16( %va, @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 @@ -706,10 +706,10 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v7 ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index d1702268f829fa..901f3cd63fa9e5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1112,6 +1112,7 @@ define @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @llvm.vp.fneg.nxv16f64(, @vfneg_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll index 3705e73fda492e..b8ec285b5c34e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll @@ -329,6 +329,7 @@ define @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @vfnmsub_vf_nxv32f16( %va, @llvm.vp.fpext.nxv32f32.nxv32f16( @vfpext_nxv32f16_nxv32f32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_nxv32f16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll index cf195c7c0935e4..d990c74c67d5a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -508,11 +508,11 @@ declare @llvm.vp.fptosi.nxv32i16.nxv32f32( @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -538,11 +538,11 @@ declare @llvm.vp.fptosi.nxv32i32.nxv32f32( @vfptosi_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i32_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll index 952d28604b86c6..3b24a648d97f5f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -508,11 +508,11 @@ declare @llvm.vp.fptoui.nxv32i16.nxv32f32( @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -538,11 +538,11 @@ declare @llvm.vp.fptoui.nxv32i32.nxv32f32( @vfptoui_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i32_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index 874813f0575953..63156e1399293f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -102,13 +102,13 @@ define @vfptrunc_nxv16f32_nxv16f64( ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 @@ -147,6 +147,7 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -160,7 +161,6 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: srli a5, a1, 2 ; CHECK-NEXT: slli a6, a1, 3 ; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: vsetvli a7, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v0, a5 ; CHECK-NEXT: add a6, a0, a6 ; CHECK-NEXT: sub a5, a2, a4 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index 8edcf23988c7fb..8e57be1e0697c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -167,12 +167,12 @@ declare @llvm.vp.sqrt.nxv32bf16(, < define @vfsqrt_vv_nxv32bf16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_nxv32bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 @@ -452,12 +452,12 @@ define @vfsqrt_vv_nxv32f16( %va, @llvm.vp.sqrt.nxv16f64(, @vfsqrt_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 25a80e66c4a527..d034f65479a159 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -373,11 +373,11 @@ define @vfsub_vv_nxv32bf16( %va, @vfsub_vf_nxv32bf16( %va, bf ; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: addi a3, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a1 @@ -566,10 +566,10 @@ define @vfsub_vf_nxv32bf16_unmasked( @vfsub_vv_nxv32f16( %va, @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v16, a1 @@ -1328,10 +1328,10 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmv8r.v v16, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v7 ; ZVFHMIN-NEXT: addi a3, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 1a1472fcfc66f5..fa67fa38144aae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -111,6 +111,7 @@ define @different_vl_with_ta( %a, @different_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_vl_with_tu: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v14, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma ; CHECK-NEXT: vadd.vv v14, v10, v12 @@ -126,8 +127,8 @@ define @different_vl_with_tu( %passthru, @different_imm_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_imm_vl_with_tu: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv2r.v v14, v10 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-NEXT: vmv2r.v v14, v10 ; CHECK-NEXT: vadd.vv v14, v10, v12 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; CHECK-NEXT: vadd.vv v8, v14, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll index 1516d656663b6b..2962f796cf10c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll @@ -51,6 +51,7 @@ entry: define @test_vlseg2ff_mask_dead_vl(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2ff_mask_dead_vl: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll index b89097b8ff9744..4ec7dae561fad4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll @@ -25,6 +25,7 @@ entry: define @test_vlseg2ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -64,6 +65,7 @@ entry: define @test_vlseg2ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -103,6 +105,7 @@ entry: define @test_vlseg2ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -142,6 +145,7 @@ entry: define @test_vlseg2ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -181,6 +185,7 @@ entry: define @test_vlseg2ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -220,6 +225,7 @@ entry: define @test_vlseg2ff_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -259,6 +265,7 @@ entry: define @test_vlseg3ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -299,6 +306,7 @@ entry: define @test_vlseg3ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -339,6 +347,7 @@ entry: define @test_vlseg3ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -379,6 +388,7 @@ entry: define @test_vlseg3ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -419,6 +429,7 @@ entry: define @test_vlseg3ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -459,6 +470,7 @@ entry: define @test_vlseg4ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -500,6 +512,7 @@ entry: define @test_vlseg4ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -541,6 +554,7 @@ entry: define @test_vlseg4ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -582,6 +596,7 @@ entry: define @test_vlseg4ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -623,6 +638,7 @@ entry: define @test_vlseg4ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -664,6 +680,7 @@ entry: define @test_vlseg5ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -706,6 +723,7 @@ entry: define @test_vlseg5ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -748,6 +766,7 @@ entry: define @test_vlseg5ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -790,6 +809,7 @@ entry: define @test_vlseg5ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -832,6 +852,7 @@ entry: define @test_vlseg6ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -875,6 +896,7 @@ entry: define @test_vlseg6ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -918,6 +940,7 @@ entry: define @test_vlseg6ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -961,6 +984,7 @@ entry: define @test_vlseg6ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1004,6 +1028,7 @@ entry: define @test_vlseg7ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1048,6 +1073,7 @@ entry: define @test_vlseg7ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1092,6 +1118,7 @@ entry: define @test_vlseg7ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1136,6 +1163,7 @@ entry: define @test_vlseg7ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1180,6 +1208,7 @@ entry: define @test_vlseg8ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1225,6 +1254,7 @@ entry: define @test_vlseg8ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1270,6 +1300,7 @@ entry: define @test_vlseg8ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1315,6 +1346,7 @@ entry: define @test_vlseg8ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1359,6 +1391,7 @@ entry: define @test_vlseg2ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1397,6 +1430,7 @@ entry: define @test_vlseg2ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1435,6 +1469,7 @@ entry: define @test_vlseg2ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1473,6 +1508,7 @@ entry: define @test_vlseg2ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1511,6 +1547,7 @@ entry: define @test_vlseg2ff_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1549,6 +1586,7 @@ entry: define @test_vlseg3ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1588,6 +1626,7 @@ entry: define @test_vlseg3ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1627,6 +1666,7 @@ entry: define @test_vlseg3ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1666,6 +1706,7 @@ entry: define @test_vlseg3ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -1705,6 +1746,7 @@ entry: define @test_vlseg4ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1745,6 +1787,7 @@ entry: define @test_vlseg4ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1785,6 +1828,7 @@ entry: define @test_vlseg4ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1825,6 +1869,7 @@ entry: define @test_vlseg4ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -1865,6 +1910,7 @@ entry: define @test_vlseg5ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1906,6 +1952,7 @@ entry: define @test_vlseg5ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1947,6 +1994,7 @@ entry: define @test_vlseg5ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1988,6 +2036,7 @@ entry: define @test_vlseg6ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2030,6 +2079,7 @@ entry: define @test_vlseg6ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2072,6 +2122,7 @@ entry: define @test_vlseg6ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2114,6 +2165,7 @@ entry: define @test_vlseg7ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2157,6 +2209,7 @@ entry: define @test_vlseg7ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2200,6 +2253,7 @@ entry: define @test_vlseg7ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2243,6 +2297,7 @@ entry: define @test_vlseg8ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2287,6 +2342,7 @@ entry: define @test_vlseg8ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2331,6 +2387,7 @@ entry: define @test_vlseg8ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2375,6 +2432,7 @@ entry: define @test_vlseg2ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -2413,6 +2471,7 @@ entry: define @test_vlseg2ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -2451,6 +2510,7 @@ entry: define @test_vlseg2ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -2489,6 +2549,7 @@ entry: define @test_vlseg2ff_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2527,6 +2588,7 @@ entry: define @test_vlseg3ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2566,6 +2628,7 @@ entry: define @test_vlseg3ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2605,6 +2668,7 @@ entry: define @test_vlseg3ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -2644,6 +2708,7 @@ entry: define @test_vlseg4ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2684,6 +2749,7 @@ entry: define @test_vlseg4ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2724,6 +2790,7 @@ entry: define @test_vlseg4ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -2764,6 +2831,7 @@ entry: define @test_vlseg5ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2805,6 +2873,7 @@ entry: define @test_vlseg5ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2846,6 +2915,7 @@ entry: define @test_vlseg6ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2888,6 +2958,7 @@ entry: define @test_vlseg6ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2930,6 +3001,7 @@ entry: define @test_vlseg7ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2973,6 +3045,7 @@ entry: define @test_vlseg7ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3016,6 +3089,7 @@ entry: define @test_vlseg8ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3060,6 +3134,7 @@ entry: define @test_vlseg8ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3104,6 +3179,7 @@ entry: define @test_vlseg2ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -3142,6 +3218,7 @@ entry: define @test_vlseg2ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -3180,6 +3257,7 @@ entry: define @test_vlseg2ff_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -3218,6 +3296,7 @@ entry: define @test_vlseg3ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3257,6 +3336,7 @@ entry: define @test_vlseg3ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -3296,6 +3376,7 @@ entry: define @test_vlseg4ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3336,6 +3417,7 @@ entry: define @test_vlseg4ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -3376,6 +3458,7 @@ entry: define @test_vlseg5ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3417,6 +3500,7 @@ entry: define @test_vlseg6ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3459,6 +3543,7 @@ entry: define @test_vlseg7ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3502,6 +3587,7 @@ entry: define @test_vlseg8ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3545,6 +3631,7 @@ entry: define @test_vlseg2ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -3582,6 +3669,7 @@ entry: define @test_vlseg2ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -3619,6 +3707,7 @@ entry: define @test_vlseg2ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -3656,6 +3745,7 @@ entry: define @test_vlseg2ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -3693,6 +3783,7 @@ entry: define @test_vlseg2ff_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -3730,6 +3821,7 @@ entry: define @test_vlseg3ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3768,6 +3860,7 @@ entry: define @test_vlseg3ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3806,6 +3899,7 @@ entry: define @test_vlseg3ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3844,6 +3938,7 @@ entry: define @test_vlseg3ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -3882,6 +3977,7 @@ entry: define @test_vlseg4ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3921,6 +4017,7 @@ entry: define @test_vlseg4ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3960,6 +4057,7 @@ entry: define @test_vlseg4ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3999,6 +4097,7 @@ entry: define @test_vlseg4ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -4038,6 +4137,7 @@ entry: define @test_vlseg5ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4078,6 +4178,7 @@ entry: define @test_vlseg5ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4118,6 +4219,7 @@ entry: define @test_vlseg5ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4158,6 +4260,7 @@ entry: define @test_vlseg6ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4199,6 +4302,7 @@ entry: define @test_vlseg6ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4240,6 +4344,7 @@ entry: define @test_vlseg6ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4281,6 +4386,7 @@ entry: define @test_vlseg7ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4323,6 +4429,7 @@ entry: define @test_vlseg7ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4365,6 +4472,7 @@ entry: define @test_vlseg7ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4407,6 +4515,7 @@ entry: define @test_vlseg8ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4450,6 +4559,7 @@ entry: define @test_vlseg8ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4493,6 +4603,7 @@ entry: define @test_vlseg8ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4536,6 +4647,7 @@ entry: define @test_vlseg2ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -4573,6 +4685,7 @@ entry: define @test_vlseg2ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -4610,6 +4723,7 @@ entry: define @test_vlseg2ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -4647,6 +4761,7 @@ entry: define @test_vlseg2ff_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -4684,6 +4799,7 @@ entry: define @test_vlseg3ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4722,6 +4838,7 @@ entry: define @test_vlseg3ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4760,6 +4877,7 @@ entry: define @test_vlseg3ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -4798,6 +4916,7 @@ entry: define @test_vlseg4ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4837,6 +4956,7 @@ entry: define @test_vlseg4ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4876,6 +4996,7 @@ entry: define @test_vlseg4ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -4915,6 +5036,7 @@ entry: define @test_vlseg5ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4955,6 +5077,7 @@ entry: define @test_vlseg5ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4995,6 +5118,7 @@ entry: define @test_vlseg6ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5036,6 +5160,7 @@ entry: define @test_vlseg6ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5077,6 +5202,7 @@ entry: define @test_vlseg7ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5119,6 +5245,7 @@ entry: define @test_vlseg7ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5161,6 +5288,7 @@ entry: define @test_vlseg8ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5204,6 +5332,7 @@ entry: define @test_vlseg8ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5247,6 +5376,7 @@ entry: define @test_vlseg2ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -5284,6 +5414,7 @@ entry: define @test_vlseg2ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -5321,6 +5452,7 @@ entry: define @test_vlseg2ff_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -5358,6 +5490,7 @@ entry: define @test_vlseg3ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5396,6 +5529,7 @@ entry: define @test_vlseg3ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -5434,6 +5568,7 @@ entry: define @test_vlseg4ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5473,6 +5608,7 @@ entry: define @test_vlseg4ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -5512,6 +5648,7 @@ entry: define @test_vlseg5ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5552,6 +5689,7 @@ entry: define @test_vlseg6ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5593,6 +5731,7 @@ entry: define @test_vlseg7ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5635,6 +5774,7 @@ entry: define @test_vlseg8ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5678,6 +5818,7 @@ entry: define @test_vlseg2ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -5715,6 +5856,7 @@ entry: define @test_vlseg2ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -5752,6 +5894,7 @@ entry: define @test_vlseg2ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -5789,6 +5932,7 @@ entry: define @test_vlseg2ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -5826,6 +5970,7 @@ entry: define @test_vlseg2ff_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -5863,6 +6008,7 @@ entry: define @test_vlseg3ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5901,6 +6047,7 @@ entry: define @test_vlseg3ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5939,6 +6086,7 @@ entry: define @test_vlseg3ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5977,6 +6125,7 @@ entry: define @test_vlseg3ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -6015,6 +6164,7 @@ entry: define @test_vlseg4ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6054,6 +6204,7 @@ entry: define @test_vlseg4ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6093,6 +6244,7 @@ entry: define @test_vlseg4ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6132,6 +6284,7 @@ entry: define @test_vlseg4ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -6171,6 +6324,7 @@ entry: define @test_vlseg5ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6211,6 +6365,7 @@ entry: define @test_vlseg5ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6251,6 +6406,7 @@ entry: define @test_vlseg5ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6291,6 +6447,7 @@ entry: define @test_vlseg6ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6332,6 +6489,7 @@ entry: define @test_vlseg6ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6373,6 +6531,7 @@ entry: define @test_vlseg6ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6414,6 +6573,7 @@ entry: define @test_vlseg7ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6456,6 +6616,7 @@ entry: define @test_vlseg7ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6498,6 +6659,7 @@ entry: define @test_vlseg7ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6540,6 +6702,7 @@ entry: define @test_vlseg8ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6583,6 +6746,7 @@ entry: define @test_vlseg8ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6626,6 +6790,7 @@ entry: define @test_vlseg8ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll index 3dc0db90b6d854..e7d4195e2174f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll @@ -51,6 +51,7 @@ entry: define @test_vlseg2ff_mask_dead_vl(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2ff_mask_dead_vl: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll index 68acb3beb06867..5da529a73a5240 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll @@ -25,6 +25,7 @@ entry: define @test_vlseg2ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -64,6 +65,7 @@ entry: define @test_vlseg2ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -103,6 +105,7 @@ entry: define @test_vlseg2ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -142,6 +145,7 @@ entry: define @test_vlseg2ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -181,6 +185,7 @@ entry: define @test_vlseg2ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -220,6 +225,7 @@ entry: define @test_vlseg2ff_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -259,6 +265,7 @@ entry: define @test_vlseg3ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -299,6 +306,7 @@ entry: define @test_vlseg3ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -339,6 +347,7 @@ entry: define @test_vlseg3ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -379,6 +388,7 @@ entry: define @test_vlseg3ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -419,6 +429,7 @@ entry: define @test_vlseg3ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -459,6 +470,7 @@ entry: define @test_vlseg4ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -500,6 +512,7 @@ entry: define @test_vlseg4ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -541,6 +554,7 @@ entry: define @test_vlseg4ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -582,6 +596,7 @@ entry: define @test_vlseg4ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -623,6 +638,7 @@ entry: define @test_vlseg4ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -664,6 +680,7 @@ entry: define @test_vlseg5ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -706,6 +723,7 @@ entry: define @test_vlseg5ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -748,6 +766,7 @@ entry: define @test_vlseg5ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -790,6 +809,7 @@ entry: define @test_vlseg5ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -832,6 +852,7 @@ entry: define @test_vlseg6ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -875,6 +896,7 @@ entry: define @test_vlseg6ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -918,6 +940,7 @@ entry: define @test_vlseg6ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -961,6 +984,7 @@ entry: define @test_vlseg6ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1004,6 +1028,7 @@ entry: define @test_vlseg7ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1048,6 +1073,7 @@ entry: define @test_vlseg7ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1092,6 +1118,7 @@ entry: define @test_vlseg7ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1136,6 +1163,7 @@ entry: define @test_vlseg7ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1180,6 +1208,7 @@ entry: define @test_vlseg8ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1225,6 +1254,7 @@ entry: define @test_vlseg8ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1270,6 +1300,7 @@ entry: define @test_vlseg8ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1315,6 +1346,7 @@ entry: define @test_vlseg8ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1359,6 +1391,7 @@ entry: define @test_vlseg2ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1397,6 +1430,7 @@ entry: define @test_vlseg2ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1435,6 +1469,7 @@ entry: define @test_vlseg2ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1473,6 +1508,7 @@ entry: define @test_vlseg2ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1511,6 +1547,7 @@ entry: define @test_vlseg2ff_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1549,6 +1586,7 @@ entry: define @test_vlseg3ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1588,6 +1626,7 @@ entry: define @test_vlseg3ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1627,6 +1666,7 @@ entry: define @test_vlseg3ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1666,6 +1706,7 @@ entry: define @test_vlseg3ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -1705,6 +1746,7 @@ entry: define @test_vlseg4ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1745,6 +1787,7 @@ entry: define @test_vlseg4ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1785,6 +1828,7 @@ entry: define @test_vlseg4ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1825,6 +1869,7 @@ entry: define @test_vlseg4ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -1865,6 +1910,7 @@ entry: define @test_vlseg5ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1906,6 +1952,7 @@ entry: define @test_vlseg5ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1947,6 +1994,7 @@ entry: define @test_vlseg5ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -1988,6 +2036,7 @@ entry: define @test_vlseg6ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2030,6 +2079,7 @@ entry: define @test_vlseg6ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2072,6 +2122,7 @@ entry: define @test_vlseg6ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2114,6 +2165,7 @@ entry: define @test_vlseg7ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2157,6 +2209,7 @@ entry: define @test_vlseg7ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2200,6 +2253,7 @@ entry: define @test_vlseg7ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2243,6 +2297,7 @@ entry: define @test_vlseg8ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2287,6 +2342,7 @@ entry: define @test_vlseg8ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2331,6 +2387,7 @@ entry: define @test_vlseg8ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2375,6 +2432,7 @@ entry: define @test_vlseg2ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -2413,6 +2471,7 @@ entry: define @test_vlseg2ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -2451,6 +2510,7 @@ entry: define @test_vlseg2ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -2489,6 +2549,7 @@ entry: define @test_vlseg2ff_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -2527,6 +2588,7 @@ entry: define @test_vlseg3ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2566,6 +2628,7 @@ entry: define @test_vlseg3ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2605,6 +2668,7 @@ entry: define @test_vlseg3ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -2644,6 +2708,7 @@ entry: define @test_vlseg4ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2684,6 +2749,7 @@ entry: define @test_vlseg4ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2724,6 +2790,7 @@ entry: define @test_vlseg4ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -2764,6 +2831,7 @@ entry: define @test_vlseg5ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2805,6 +2873,7 @@ entry: define @test_vlseg5ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2846,6 +2915,7 @@ entry: define @test_vlseg6ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2888,6 +2958,7 @@ entry: define @test_vlseg6ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2930,6 +3001,7 @@ entry: define @test_vlseg7ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -2973,6 +3045,7 @@ entry: define @test_vlseg7ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3016,6 +3089,7 @@ entry: define @test_vlseg8ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3060,6 +3134,7 @@ entry: define @test_vlseg8ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3104,6 +3179,7 @@ entry: define @test_vlseg2ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -3142,6 +3218,7 @@ entry: define @test_vlseg2ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -3180,6 +3257,7 @@ entry: define @test_vlseg2ff_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -3218,6 +3296,7 @@ entry: define @test_vlseg3ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3257,6 +3336,7 @@ entry: define @test_vlseg3ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -3296,6 +3376,7 @@ entry: define @test_vlseg4ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3336,6 +3417,7 @@ entry: define @test_vlseg4ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -3376,6 +3458,7 @@ entry: define @test_vlseg5ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3417,6 +3500,7 @@ entry: define @test_vlseg6ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3459,6 +3543,7 @@ entry: define @test_vlseg7ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3502,6 +3587,7 @@ entry: define @test_vlseg8ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3545,6 +3631,7 @@ entry: define @test_vlseg2ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -3582,6 +3669,7 @@ entry: define @test_vlseg2ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -3619,6 +3707,7 @@ entry: define @test_vlseg2ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -3656,6 +3745,7 @@ entry: define @test_vlseg2ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -3693,6 +3783,7 @@ entry: define @test_vlseg2ff_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -3730,6 +3821,7 @@ entry: define @test_vlseg3ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3768,6 +3860,7 @@ entry: define @test_vlseg3ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3806,6 +3899,7 @@ entry: define @test_vlseg3ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3844,6 +3938,7 @@ entry: define @test_vlseg3ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -3882,6 +3977,7 @@ entry: define @test_vlseg4ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3921,6 +4017,7 @@ entry: define @test_vlseg4ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3960,6 +4057,7 @@ entry: define @test_vlseg4ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -3999,6 +4097,7 @@ entry: define @test_vlseg4ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -4038,6 +4137,7 @@ entry: define @test_vlseg5ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4078,6 +4178,7 @@ entry: define @test_vlseg5ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4118,6 +4219,7 @@ entry: define @test_vlseg5ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4158,6 +4260,7 @@ entry: define @test_vlseg6ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4199,6 +4302,7 @@ entry: define @test_vlseg6ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4240,6 +4344,7 @@ entry: define @test_vlseg6ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4281,6 +4386,7 @@ entry: define @test_vlseg7ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4323,6 +4429,7 @@ entry: define @test_vlseg7ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4365,6 +4472,7 @@ entry: define @test_vlseg7ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4407,6 +4515,7 @@ entry: define @test_vlseg8ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4450,6 +4559,7 @@ entry: define @test_vlseg8ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4493,6 +4603,7 @@ entry: define @test_vlseg8ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4536,6 +4647,7 @@ entry: define @test_vlseg2ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -4573,6 +4685,7 @@ entry: define @test_vlseg2ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -4610,6 +4723,7 @@ entry: define @test_vlseg2ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -4647,6 +4761,7 @@ entry: define @test_vlseg2ff_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -4684,6 +4799,7 @@ entry: define @test_vlseg3ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4722,6 +4838,7 @@ entry: define @test_vlseg3ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4760,6 +4877,7 @@ entry: define @test_vlseg3ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -4798,6 +4916,7 @@ entry: define @test_vlseg4ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4837,6 +4956,7 @@ entry: define @test_vlseg4ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4876,6 +4996,7 @@ entry: define @test_vlseg4ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -4915,6 +5036,7 @@ entry: define @test_vlseg5ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4955,6 +5077,7 @@ entry: define @test_vlseg5ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -4995,6 +5118,7 @@ entry: define @test_vlseg6ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5036,6 +5160,7 @@ entry: define @test_vlseg6ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5077,6 +5202,7 @@ entry: define @test_vlseg7ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5119,6 +5245,7 @@ entry: define @test_vlseg7ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5161,6 +5288,7 @@ entry: define @test_vlseg8ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5204,6 +5332,7 @@ entry: define @test_vlseg8ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5247,6 +5376,7 @@ entry: define @test_vlseg2ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -5284,6 +5414,7 @@ entry: define @test_vlseg2ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -5321,6 +5452,7 @@ entry: define @test_vlseg2ff_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -5358,6 +5490,7 @@ entry: define @test_vlseg3ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5396,6 +5529,7 @@ entry: define @test_vlseg3ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -5434,6 +5568,7 @@ entry: define @test_vlseg4ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5473,6 +5608,7 @@ entry: define @test_vlseg4ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -5512,6 +5648,7 @@ entry: define @test_vlseg5ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5552,6 +5689,7 @@ entry: define @test_vlseg6ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5593,6 +5731,7 @@ entry: define @test_vlseg7ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5635,6 +5774,7 @@ entry: define @test_vlseg8ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5678,6 +5818,7 @@ entry: define @test_vlseg2ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -5715,6 +5856,7 @@ entry: define @test_vlseg2ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -5752,6 +5894,7 @@ entry: define @test_vlseg2ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -5789,6 +5932,7 @@ entry: define @test_vlseg2ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -5826,6 +5970,7 @@ entry: define @test_vlseg2ff_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -5863,6 +6008,7 @@ entry: define @test_vlseg3ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5901,6 +6047,7 @@ entry: define @test_vlseg3ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5939,6 +6086,7 @@ entry: define @test_vlseg3ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -5977,6 +6125,7 @@ entry: define @test_vlseg3ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -6015,6 +6164,7 @@ entry: define @test_vlseg4ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6054,6 +6204,7 @@ entry: define @test_vlseg4ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6093,6 +6244,7 @@ entry: define @test_vlseg4ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6132,6 +6284,7 @@ entry: define @test_vlseg4ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: vmv2r.v v10, v12 @@ -6171,6 +6324,7 @@ entry: define @test_vlseg5ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6211,6 +6365,7 @@ entry: define @test_vlseg5ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6251,6 +6406,7 @@ entry: define @test_vlseg5ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6291,6 +6447,7 @@ entry: define @test_vlseg6ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6332,6 +6489,7 @@ entry: define @test_vlseg6ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6373,6 +6531,7 @@ entry: define @test_vlseg6ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6414,6 +6573,7 @@ entry: define @test_vlseg7ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6456,6 +6616,7 @@ entry: define @test_vlseg7ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6498,6 +6659,7 @@ entry: define @test_vlseg7ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6540,6 +6702,7 @@ entry: define @test_vlseg8ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6583,6 +6746,7 @@ entry: define @test_vlseg8ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -6626,6 +6790,7 @@ entry: define @test_vlseg8ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv1r.v v9, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index 0b553d3cd6fdf4..b839cd595f3bef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -412,8 +412,8 @@ declare @llvm.vp.smax.nxv128i8(, @vmax_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -974,11 +974,11 @@ declare @llvm.vp.smax.nxv32i32(, @vmax_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 @@ -1034,11 +1034,11 @@ declare i32 @llvm.vscale.i32() define @vmax_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a3, a1, 2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index f6be882f742062..99e0dfaf90a2d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -410,8 +410,8 @@ declare @llvm.vp.umax.nxv128i8(, @vmaxu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -972,11 +972,11 @@ declare @llvm.vp.umax.nxv32i32(, @vmaxu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 @@ -1032,11 +1032,11 @@ declare i32 @llvm.vscale.i32() define @vmaxu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a3, a1, 2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll index 3ebfc68ddee4b9..7216789241f9c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmfeq.mask.nxv1f16( define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmfeq.mask.nxv2f16( define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmfeq.mask.nxv4f16( define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmfeq.mask.nxv8f16( define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmfeq.mask.nxv16f16( define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmfeq.mask.nxv1f32( define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v9 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmfeq.mask.nxv2f32( define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmfeq.mask.nxv4f32( define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmfeq.mask.nxv8f32( define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmfeq.mask.nxv1f64( define @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v9 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmfeq.mask.nxv2f64( define @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmfeq.mask.nxv4f64( define @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmfeq.mask.nxv1f16.f16( define @intrinsic_vmfeq_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -693,6 +706,7 @@ declare @llvm.riscv.vmfeq.mask.nxv2f16.f16( define @intrinsic_vmfeq_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -740,6 +754,7 @@ declare @llvm.riscv.vmfeq.mask.nxv4f16.f16( define @intrinsic_vmfeq_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -787,6 +802,7 @@ declare @llvm.riscv.vmfeq.mask.nxv8f16.f16( define @intrinsic_vmfeq_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -834,6 +850,7 @@ declare @llvm.riscv.vmfeq.mask.nxv16f16.f16( define @intrinsic_vmfeq_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -881,6 +898,7 @@ declare @llvm.riscv.vmfeq.mask.nxv1f32.f32( define @intrinsic_vmfeq_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -928,6 +946,7 @@ declare @llvm.riscv.vmfeq.mask.nxv2f32.f32( define @intrinsic_vmfeq_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -975,6 +994,7 @@ declare @llvm.riscv.vmfeq.mask.nxv4f32.f32( define @intrinsic_vmfeq_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1022,6 +1042,7 @@ declare @llvm.riscv.vmfeq.mask.nxv8f32.f32( define @intrinsic_vmfeq_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1069,6 +1090,7 @@ declare @llvm.riscv.vmfeq.mask.nxv1f64.f64( define @intrinsic_vmfeq_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1116,6 +1138,7 @@ declare @llvm.riscv.vmfeq.mask.nxv2f64.f64( define @intrinsic_vmfeq_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1163,6 +1186,7 @@ declare @llvm.riscv.vmfeq.mask.nxv4f64.f64( define @intrinsic_vmfeq_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index e041e5874a8dc7..c50653730b38ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmfge.mask.nxv1f16( define @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v9, v8 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmfge.mask.nxv2f16( define @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v9, v8 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmfge.mask.nxv4f16( define @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfle.vv v0, v9, v8 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmfge.mask.nxv8f16( define @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmfge.mask.nxv16f16( define @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmfge.mask.nxv1f32( define @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v9, v8 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmfge.mask.nxv2f32( define @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfle.vv v0, v9, v8 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmfge.mask.nxv4f32( define @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmfge.mask.nxv8f32( define @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmfge.mask.nxv1f64( define @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfle.vv v0, v9, v8 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmfge.mask.nxv2f64( define @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmfge.mask.nxv4f64( define @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmfge.mask.nxv1f16.f16( define @intrinsic_vmfge_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -693,6 +706,7 @@ declare @llvm.riscv.vmfge.mask.nxv2f16.f16( define @intrinsic_vmfge_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -740,6 +754,7 @@ declare @llvm.riscv.vmfge.mask.nxv4f16.f16( define @intrinsic_vmfge_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -787,6 +802,7 @@ declare @llvm.riscv.vmfge.mask.nxv8f16.f16( define @intrinsic_vmfge_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -834,6 +850,7 @@ declare @llvm.riscv.vmfge.mask.nxv16f16.f16( define @intrinsic_vmfge_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -881,6 +898,7 @@ declare @llvm.riscv.vmfge.mask.nxv1f32.f32( define @intrinsic_vmfge_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -928,6 +946,7 @@ declare @llvm.riscv.vmfge.mask.nxv2f32.f32( define @intrinsic_vmfge_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -975,6 +994,7 @@ declare @llvm.riscv.vmfge.mask.nxv4f32.f32( define @intrinsic_vmfge_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1022,6 +1042,7 @@ declare @llvm.riscv.vmfge.mask.nxv8f32.f32( define @intrinsic_vmfge_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1069,6 +1090,7 @@ declare @llvm.riscv.vmfge.mask.nxv1f64.f64( define @intrinsic_vmfge_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1116,6 +1138,7 @@ declare @llvm.riscv.vmfge.mask.nxv2f64.f64( define @intrinsic_vmfge_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1163,6 +1186,7 @@ declare @llvm.riscv.vmfge.mask.nxv4f64.f64( define @intrinsic_vmfge_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index 0faaf4ebf255d4..6370e8287c77e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmfgt.mask.nxv1f16( define @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v9, v8 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmfgt.mask.nxv2f16( define @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v9, v8 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmfgt.mask.nxv4f16( define @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vv v0, v9, v8 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmfgt.mask.nxv8f16( define @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmfgt.mask.nxv16f16( define @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmfgt.mask.nxv1f32( define @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v9, v8 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmfgt.mask.nxv2f32( define @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vv v0, v9, v8 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmfgt.mask.nxv4f32( define @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmfgt.mask.nxv8f32( define @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmfgt.mask.nxv1f64( define @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vv v0, v9, v8 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmfgt.mask.nxv2f64( define @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmfgt.mask.nxv4f64( define @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmfgt.mask.nxv1f16.f16( define @intrinsic_vmfgt_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -693,6 +706,7 @@ declare @llvm.riscv.vmfgt.mask.nxv2f16.f16( define @intrinsic_vmfgt_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -740,6 +754,7 @@ declare @llvm.riscv.vmfgt.mask.nxv4f16.f16( define @intrinsic_vmfgt_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -787,6 +802,7 @@ declare @llvm.riscv.vmfgt.mask.nxv8f16.f16( define @intrinsic_vmfgt_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -834,6 +850,7 @@ declare @llvm.riscv.vmfgt.mask.nxv16f16.f16( define @intrinsic_vmfgt_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -881,6 +898,7 @@ declare @llvm.riscv.vmfgt.mask.nxv1f32.f32( define @intrinsic_vmfgt_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -928,6 +946,7 @@ declare @llvm.riscv.vmfgt.mask.nxv2f32.f32( define @intrinsic_vmfgt_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -975,6 +994,7 @@ declare @llvm.riscv.vmfgt.mask.nxv4f32.f32( define @intrinsic_vmfgt_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1022,6 +1042,7 @@ declare @llvm.riscv.vmfgt.mask.nxv8f32.f32( define @intrinsic_vmfgt_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1069,6 +1090,7 @@ declare @llvm.riscv.vmfgt.mask.nxv1f64.f64( define @intrinsic_vmfgt_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1116,6 +1138,7 @@ declare @llvm.riscv.vmfgt.mask.nxv2f64.f64( define @intrinsic_vmfgt_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1163,6 +1186,7 @@ declare @llvm.riscv.vmfgt.mask.nxv4f64.f64( define @intrinsic_vmfgt_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index ef5de6bc3481fb..d9ccc19b62095f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmfle.mask.nxv1f16( define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmfle.mask.nxv2f16( define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmfle.mask.nxv4f16( define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmfle.mask.nxv8f16( define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmfle.mask.nxv16f16( define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmfle.mask.nxv1f32( define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v9 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmfle.mask.nxv2f32( define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmfle.mask.nxv4f32( define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmfle.mask.nxv8f32( define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmfle.mask.nxv1f64( define @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v9 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmfle.mask.nxv2f64( define @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmfle.mask.nxv4f64( define @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmfle.mask.nxv1f16.f16( define @intrinsic_vmfle_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -693,6 +706,7 @@ declare @llvm.riscv.vmfle.mask.nxv2f16.f16( define @intrinsic_vmfle_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -740,6 +754,7 @@ declare @llvm.riscv.vmfle.mask.nxv4f16.f16( define @intrinsic_vmfle_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -787,6 +802,7 @@ declare @llvm.riscv.vmfle.mask.nxv8f16.f16( define @intrinsic_vmfle_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -834,6 +850,7 @@ declare @llvm.riscv.vmfle.mask.nxv16f16.f16( define @intrinsic_vmfle_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -881,6 +898,7 @@ declare @llvm.riscv.vmfle.mask.nxv1f32.f32( define @intrinsic_vmfle_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -928,6 +946,7 @@ declare @llvm.riscv.vmfle.mask.nxv2f32.f32( define @intrinsic_vmfle_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -975,6 +994,7 @@ declare @llvm.riscv.vmfle.mask.nxv4f32.f32( define @intrinsic_vmfle_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1022,6 +1042,7 @@ declare @llvm.riscv.vmfle.mask.nxv8f32.f32( define @intrinsic_vmfle_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1069,6 +1090,7 @@ declare @llvm.riscv.vmfle.mask.nxv1f64.f64( define @intrinsic_vmfle_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1116,6 +1138,7 @@ declare @llvm.riscv.vmfle.mask.nxv2f64.f64( define @intrinsic_vmfle_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1163,6 +1186,7 @@ declare @llvm.riscv.vmfle.mask.nxv4f64.f64( define @intrinsic_vmfle_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 0b7740d5e00457..d6b2163a82ca08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmflt.mask.nxv1f16( define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmflt.mask.nxv2f16( define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmflt.mask.nxv4f16( define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmflt.mask.nxv8f16( define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmflt.mask.nxv16f16( define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmflt.mask.nxv1f32( define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v9 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmflt.mask.nxv2f32( define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmflt.mask.nxv4f32( define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmflt.mask.nxv8f32( define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmflt.mask.nxv1f64( define @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v9 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmflt.mask.nxv2f64( define @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmflt.mask.nxv4f64( define @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmflt.mask.nxv1f16.f16( define @intrinsic_vmflt_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -693,6 +706,7 @@ declare @llvm.riscv.vmflt.mask.nxv2f16.f16( define @intrinsic_vmflt_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -740,6 +754,7 @@ declare @llvm.riscv.vmflt.mask.nxv4f16.f16( define @intrinsic_vmflt_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -787,6 +802,7 @@ declare @llvm.riscv.vmflt.mask.nxv8f16.f16( define @intrinsic_vmflt_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -834,6 +850,7 @@ declare @llvm.riscv.vmflt.mask.nxv16f16.f16( define @intrinsic_vmflt_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -881,6 +898,7 @@ declare @llvm.riscv.vmflt.mask.nxv1f32.f32( define @intrinsic_vmflt_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -928,6 +946,7 @@ declare @llvm.riscv.vmflt.mask.nxv2f32.f32( define @intrinsic_vmflt_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -975,6 +994,7 @@ declare @llvm.riscv.vmflt.mask.nxv4f32.f32( define @intrinsic_vmflt_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1022,6 +1042,7 @@ declare @llvm.riscv.vmflt.mask.nxv8f32.f32( define @intrinsic_vmflt_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1069,6 +1090,7 @@ declare @llvm.riscv.vmflt.mask.nxv1f64.f64( define @intrinsic_vmflt_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1116,6 +1138,7 @@ declare @llvm.riscv.vmflt.mask.nxv2f64.f64( define @intrinsic_vmflt_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1163,6 +1186,7 @@ declare @llvm.riscv.vmflt.mask.nxv4f64.f64( define @intrinsic_vmflt_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index 65a04e504a973b..e93f6cec3971ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmfne.mask.nxv1f16( define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmfne.mask.nxv2f16( define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmfne.mask.nxv4f16( define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmfne.mask.nxv8f16( define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmfne.mask.nxv16f16( define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmfne.mask.nxv1f32( define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v9 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmfne.mask.nxv2f32( define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmfne.mask.nxv4f32( define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmfne.mask.nxv8f32( define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmfne.mask.nxv1f64( define @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v9 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmfne.mask.nxv2f64( define @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmfne.mask.nxv4f64( define @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmfne.mask.nxv1f16.f16( define @intrinsic_vmfne_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -693,6 +706,7 @@ declare @llvm.riscv.vmfne.mask.nxv2f16.f16( define @intrinsic_vmfne_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -740,6 +754,7 @@ declare @llvm.riscv.vmfne.mask.nxv4f16.f16( define @intrinsic_vmfne_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -787,6 +802,7 @@ declare @llvm.riscv.vmfne.mask.nxv8f16.f16( define @intrinsic_vmfne_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -834,6 +850,7 @@ declare @llvm.riscv.vmfne.mask.nxv16f16.f16( define @intrinsic_vmfne_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -881,6 +898,7 @@ declare @llvm.riscv.vmfne.mask.nxv1f32.f32( define @intrinsic_vmfne_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -928,6 +946,7 @@ declare @llvm.riscv.vmfne.mask.nxv2f32.f32( define @intrinsic_vmfne_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -975,6 +994,7 @@ declare @llvm.riscv.vmfne.mask.nxv4f32.f32( define @intrinsic_vmfne_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1022,6 +1042,7 @@ declare @llvm.riscv.vmfne.mask.nxv8f32.f32( define @intrinsic_vmfne_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1069,6 +1090,7 @@ declare @llvm.riscv.vmfne.mask.nxv1f64.f64( define @intrinsic_vmfne_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1116,6 +1138,7 @@ declare @llvm.riscv.vmfne.mask.nxv2f64.f64( define @intrinsic_vmfne_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1163,6 +1186,7 @@ declare @llvm.riscv.vmfne.mask.nxv4f64.f64( define @intrinsic_vmfne_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 8690014cc2c9df..3441934fb1550e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -412,8 +412,8 @@ declare @llvm.vp.smin.nxv128i8(, @vmin_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -974,11 +974,11 @@ declare @llvm.vp.smin.nxv32i32(, @vmin_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 @@ -1034,11 +1034,11 @@ declare i32 @llvm.vscale.i32() define @vmin_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a3, a1, 2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 414807829d5630..bbf4c886bfe9fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -410,8 +410,8 @@ declare @llvm.vp.umin.nxv128i8(, @vminu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -972,11 +972,11 @@ declare @llvm.vp.umin.nxv32i32(, @vminu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 @@ -1032,11 +1032,11 @@ declare i32 @llvm.vscale.i32() define @vminu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a3, a1, 2 ; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll index d1f344d52763db..023e0ffdea0c24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll @@ -31,6 +31,7 @@ declare @llvm.riscv.vmsbf.mask.nxv1i1( define @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -73,6 +74,7 @@ declare @llvm.riscv.vmsbf.mask.nxv2i1( define @intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -115,6 +117,7 @@ declare @llvm.riscv.vmsbf.mask.nxv4i1( define @intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -157,6 +160,7 @@ declare @llvm.riscv.vmsbf.mask.nxv8i1( define @intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -199,6 +203,7 @@ declare @llvm.riscv.vmsbf.mask.nxv16i1( define @intrinsic_vmsbf_mask_m_nxv16i1_nxv16i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv16i1_nxv16i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -241,6 +246,7 @@ declare @llvm.riscv.vmsbf.mask.nxv32i1( define @intrinsic_vmsbf_mask_m_nxv32i1_nxv32i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv32i1_nxv32i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -283,6 +289,7 @@ declare @llvm.riscv.vmsbf.mask.nxv64i1( define @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll index 1fd2383c40d185..4ba813e88faf07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmseq.mask.nxv1i8( define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmseq.mask.nxv2i8( define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmseq.mask.nxv4i8( define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmseq.mask.nxv8i8( define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmseq.mask.nxv16i8( define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmseq.mask.nxv32i8( define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmseq.mask.nxv1i16( define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmseq.mask.nxv2i16( define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmseq.mask.nxv4i16( define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmseq.mask.nxv8i16( define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmseq.mask.nxv16i16( define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmseq.mask.nxv1i32( define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmseq.mask.nxv2i32( define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmseq.mask.nxv4i32( define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmseq.mask.nxv8i32( define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmseq.mask.nxv1i64( define @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v9 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmseq.mask.nxv2i64( define @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmseq.mask.nxv4i64( define @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmseq.mask.nxv1i8.i8( define @intrinsic_vmseq_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmseq.mask.nxv2i8.i8( define @intrinsic_vmseq_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmseq.mask.nxv4i8.i8( define @intrinsic_vmseq_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmseq.mask.nxv8i8.i8( define @intrinsic_vmseq_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmseq.mask.nxv16i8.i8( define @intrinsic_vmseq_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmseq.mask.nxv32i8.i8( define @intrinsic_vmseq_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmseq.mask.nxv1i16.i16( define @intrinsic_vmseq_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmseq.mask.nxv2i16.i16( define @intrinsic_vmseq_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmseq.mask.nxv4i16.i16( define @intrinsic_vmseq_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmseq.mask.nxv8i16.i16( define @intrinsic_vmseq_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmseq.mask.nxv16i16.i16( define @intrinsic_vmseq_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmseq.mask.nxv1i32.i32( define @intrinsic_vmseq_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmseq.mask.nxv2i32.i32( define @intrinsic_vmseq_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmseq.mask.nxv4i32.i32( define @intrinsic_vmseq_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmseq.mask.nxv8i32.i32( define @intrinsic_vmseq_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmseq_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmseq_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmseq_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index 2dc133d169f0a8..62d4a6df5ddd81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsge.mask.nxv1i8( define @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsge.mask.nxv2i8( define @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsge.mask.nxv4i8( define @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsge.mask.nxv8i8( define @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsge.mask.nxv16i8( define @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v10, v8 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsge.mask.nxv32i8( define @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v12, v8 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsge.mask.nxv1i16( define @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsge.mask.nxv2i16( define @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsge.mask.nxv4i16( define @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsge.mask.nxv8i16( define @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v10, v8 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsge.mask.nxv16i16( define @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v12, v8 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsge.mask.nxv1i32( define @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsge.mask.nxv2i32( define @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsge.mask.nxv4i32( define @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v10, v8 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsge.mask.nxv8i32( define @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v12, v8 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsge.mask.nxv1i64( define @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v9, v8 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsge.mask.nxv2i64( define @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v10, v8 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsge.mask.nxv4i64( define @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v12, v8 @@ -953,6 +971,7 @@ declare @llvm.riscv.vmsge.mask.nxv1i8.i8( define @intrinsic_vmsge_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -1001,6 +1020,7 @@ declare @llvm.riscv.vmsge.mask.nxv2i8.i8( define @intrinsic_vmsge_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1049,6 +1069,7 @@ declare @llvm.riscv.vmsge.mask.nxv4i8.i8( define @intrinsic_vmsge_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1069,6 +1090,7 @@ entry: define @intrinsic_vmsge_mask_vx_nxv4i8_i8_1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i8_i8_1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: li a1, 99 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -1136,6 +1158,7 @@ declare @llvm.riscv.vmsge.mask.nxv8i8.i8( define @intrinsic_vmsge_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1184,6 +1207,7 @@ declare @llvm.riscv.vmsge.mask.nxv16i8.i8( define @intrinsic_vmsge_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1232,6 +1256,7 @@ declare @llvm.riscv.vmsge.mask.nxv32i8.i8( define @intrinsic_vmsge_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1280,6 +1305,7 @@ declare @llvm.riscv.vmsge.mask.nxv1i16.i16( define @intrinsic_vmsge_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmsge.mask.nxv2i16.i16( define @intrinsic_vmsge_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1376,6 +1403,7 @@ declare @llvm.riscv.vmsge.mask.nxv4i16.i16( define @intrinsic_vmsge_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1424,6 +1452,7 @@ declare @llvm.riscv.vmsge.mask.nxv8i16.i16( define @intrinsic_vmsge_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1472,6 +1501,7 @@ declare @llvm.riscv.vmsge.mask.nxv16i16.i16( define @intrinsic_vmsge_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1520,6 +1550,7 @@ declare @llvm.riscv.vmsge.mask.nxv1i32.i32( define @intrinsic_vmsge_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1568,6 +1599,7 @@ declare @llvm.riscv.vmsge.mask.nxv2i32.i32( define @intrinsic_vmsge_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1616,6 +1648,7 @@ declare @llvm.riscv.vmsge.mask.nxv4i32.i32( define @intrinsic_vmsge_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1664,6 +1697,7 @@ declare @llvm.riscv.vmsge.mask.nxv8i32.i32( define @intrinsic_vmsge_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1739,6 +1773,7 @@ define @intrinsic_vmsge_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1814,6 +1849,7 @@ define @intrinsic_vmsge_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1889,6 +1925,7 @@ define @intrinsic_vmsge_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1924,6 +1961,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1959,6 +1997,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -2025,6 +2064,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -2060,6 +2100,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2095,6 +2136,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2130,6 +2172,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2165,6 +2208,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2200,6 +2244,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2235,6 +2280,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2270,6 +2316,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2305,6 +2352,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2340,6 +2388,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2375,6 +2424,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2410,6 +2460,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2445,6 +2496,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2480,6 +2532,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2515,6 +2568,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2550,6 +2604,7 @@ entry: define @intrinsic_vmsge_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index 69a3835cd4d678..ab0e75ef09e53e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i8( define @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i8( define @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i8( define @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i8( define @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i8( define @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v10, v8 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv32i8( define @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v12, v8 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i16( define @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i16( define @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i16( define @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i16( define @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v10, v8 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i16( define @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v12, v8 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i32( define @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i32( define @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i32( define @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v10, v8 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i32( define @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v12, v8 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i64( define @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v9, v8 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i64( define @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v10, v8 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i64( define @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v12, v8 @@ -953,6 +971,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i8.i8( define @intrinsic_vmsgeu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -1001,6 +1020,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i8.i8( define @intrinsic_vmsgeu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1049,6 +1069,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i8.i8( define @intrinsic_vmsgeu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1097,6 +1118,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i8.i8( define @intrinsic_vmsgeu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1145,6 +1167,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i8.i8( define @intrinsic_vmsgeu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1193,6 +1216,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv32i8.i8( define @intrinsic_vmsgeu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1241,6 +1265,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i16.i16( define @intrinsic_vmsgeu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1289,6 +1314,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i16.i16( define @intrinsic_vmsgeu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1337,6 +1363,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i16.i16( define @intrinsic_vmsgeu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1385,6 +1412,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i16.i16( define @intrinsic_vmsgeu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1433,6 +1461,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i16.i16( define @intrinsic_vmsgeu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1481,6 +1510,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i32.i32( define @intrinsic_vmsgeu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1529,6 +1559,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i32.i32( define @intrinsic_vmsgeu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1577,6 +1608,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i32.i32( define @intrinsic_vmsgeu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1625,6 +1657,7 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i32.i32( define @intrinsic_vmsgeu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1700,6 +1733,7 @@ define @intrinsic_vmsgeu_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1775,6 +1809,7 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1850,6 +1885,7 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1885,6 +1921,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1920,6 +1957,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1955,6 +1993,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1975,6 +2014,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv4i8_i8_1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i8_i8_1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: li a1, 99 ; CHECK-NEXT: vmv1r.v v0, v9 @@ -2011,6 +2051,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2046,6 +2087,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2081,6 +2123,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2116,6 +2159,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2214,6 +2258,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2249,6 +2294,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2284,6 +2330,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2319,6 +2366,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2354,6 +2402,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2389,6 +2438,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2424,6 +2474,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2459,6 +2510,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2494,6 +2546,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2529,6 +2582,7 @@ entry: define @intrinsic_vmsgeu_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index d7dee2e1bc580e..a0b1ac655a0dd2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsgt.mask.nxv1i8( define @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsgt.mask.nxv2i8( define @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsgt.mask.nxv4i8( define @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsgt.mask.nxv8i8( define @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsgt.mask.nxv16i8( define @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsgt.mask.nxv32i8( define @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsgt.mask.nxv1i16( define @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsgt.mask.nxv2i16( define @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsgt.mask.nxv4i16( define @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsgt.mask.nxv8i16( define @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsgt.mask.nxv16i16( define @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsgt.mask.nxv1i32( define @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsgt.mask.nxv2i32( define @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsgt.mask.nxv4i32( define @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsgt.mask.nxv8i32( define @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsgt.mask.nxv1i64( define @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v9, v8 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsgt.mask.nxv2i64( define @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsgt.mask.nxv4i64( define @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmsgt.mask.nxv1i8.i8( define @intrinsic_vmsgt_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmsgt.mask.nxv2i8.i8( define @intrinsic_vmsgt_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmsgt.mask.nxv4i8.i8( define @intrinsic_vmsgt_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmsgt.mask.nxv8i8.i8( define @intrinsic_vmsgt_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmsgt.mask.nxv16i8.i8( define @intrinsic_vmsgt_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmsgt.mask.nxv32i8.i8( define @intrinsic_vmsgt_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmsgt.mask.nxv1i16.i16( define @intrinsic_vmsgt_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmsgt.mask.nxv2i16.i16( define @intrinsic_vmsgt_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmsgt.mask.nxv4i16.i16( define @intrinsic_vmsgt_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmsgt.mask.nxv8i16.i16( define @intrinsic_vmsgt_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmsgt.mask.nxv16i16.i16( define @intrinsic_vmsgt_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmsgt.mask.nxv1i32.i32( define @intrinsic_vmsgt_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmsgt.mask.nxv2i32.i32( define @intrinsic_vmsgt_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmsgt.mask.nxv4i32.i32( define @intrinsic_vmsgt_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmsgt.mask.nxv8i32.i32( define @intrinsic_vmsgt_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmsgt_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index fe9d522f6b401b..c29df31a3e6008 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i8( define @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i8( define @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i8( define @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i8( define @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i8( define @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv32i8( define @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i16( define @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i16( define @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i16( define @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i16( define @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i16( define @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i32( define @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i32( define @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i32( define @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i32( define @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i64( define @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v9, v8 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i64( define @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i64( define @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i8.i8( define @intrinsic_vmsgtu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i8.i8( define @intrinsic_vmsgtu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i8.i8( define @intrinsic_vmsgtu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i8.i8( define @intrinsic_vmsgtu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i8.i8( define @intrinsic_vmsgtu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv32i8.i8( define @intrinsic_vmsgtu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i16.i16( define @intrinsic_vmsgtu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i16.i16( define @intrinsic_vmsgtu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i16.i16( define @intrinsic_vmsgtu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i16.i16( define @intrinsic_vmsgtu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i16.i16( define @intrinsic_vmsgtu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i32.i32( define @intrinsic_vmsgtu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i32.i32( define @intrinsic_vmsgtu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i32.i32( define @intrinsic_vmsgtu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i32.i32( define @intrinsic_vmsgtu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmsgtu_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll index 1dc52eb55455ba..b666ab5ab87b1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll @@ -31,6 +31,7 @@ declare @llvm.riscv.vmsif.mask.nxv1i1( define @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -73,6 +74,7 @@ declare @llvm.riscv.vmsif.mask.nxv2i1( define @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -115,6 +117,7 @@ declare @llvm.riscv.vmsif.mask.nxv4i1( define @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -157,6 +160,7 @@ declare @llvm.riscv.vmsif.mask.nxv8i1( define @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -199,6 +203,7 @@ declare @llvm.riscv.vmsif.mask.nxv16i1( define @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv16i1_nxv16i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -241,6 +246,7 @@ declare @llvm.riscv.vmsif.mask.nxv32i1( define @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv32i1_nxv32i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -283,6 +289,7 @@ declare @llvm.riscv.vmsif.mask.nxv64i1( define @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv64i1_nxv64i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index bc98b31957b255..c849b41461eb4e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsle.mask.nxv1i8( define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsle.mask.nxv2i8( define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsle.mask.nxv4i8( define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsle.mask.nxv8i8( define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsle.mask.nxv16i8( define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsle.mask.nxv32i8( define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsle.mask.nxv1i16( define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsle.mask.nxv2i16( define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsle.mask.nxv4i16( define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsle.mask.nxv8i16( define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsle.mask.nxv16i16( define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsle.mask.nxv1i32( define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsle.mask.nxv2i32( define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsle.mask.nxv4i32( define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsle.mask.nxv8i32( define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsle.mask.nxv1i64( define @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v9 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsle.mask.nxv2i64( define @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsle.mask.nxv4i64( define @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmsle.mask.nxv1i8.i8( define @intrinsic_vmsle_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmsle.mask.nxv2i8.i8( define @intrinsic_vmsle_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmsle.mask.nxv4i8.i8( define @intrinsic_vmsle_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmsle.mask.nxv8i8.i8( define @intrinsic_vmsle_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmsle.mask.nxv16i8.i8( define @intrinsic_vmsle_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmsle.mask.nxv32i8.i8( define @intrinsic_vmsle_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmsle.mask.nxv1i16.i16( define @intrinsic_vmsle_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmsle.mask.nxv2i16.i16( define @intrinsic_vmsle_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmsle.mask.nxv4i16.i16( define @intrinsic_vmsle_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmsle.mask.nxv8i16.i16( define @intrinsic_vmsle_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmsle.mask.nxv16i16.i16( define @intrinsic_vmsle_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmsle.mask.nxv1i32.i32( define @intrinsic_vmsle_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmsle.mask.nxv2i32.i32( define @intrinsic_vmsle_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmsle.mask.nxv4i32.i32( define @intrinsic_vmsle_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmsle.mask.nxv8i32.i32( define @intrinsic_vmsle_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmsle_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmsle_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmsle_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmsle_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index 731989cfe15d95..ccd70da73aabb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsleu.mask.nxv1i8( define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsleu.mask.nxv2i8( define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsleu.mask.nxv4i8( define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsleu.mask.nxv8i8( define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsleu.mask.nxv16i8( define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsleu.mask.nxv32i8( define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsleu.mask.nxv1i16( define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsleu.mask.nxv2i16( define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsleu.mask.nxv4i16( define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsleu.mask.nxv8i16( define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsleu.mask.nxv16i16( define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsleu.mask.nxv1i32( define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsleu.mask.nxv2i32( define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsleu.mask.nxv4i32( define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsleu.mask.nxv8i32( define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsleu.mask.nxv1i64( define @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v9 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsleu.mask.nxv2i64( define @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsleu.mask.nxv4i64( define @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmsleu.mask.nxv1i8.i8( define @intrinsic_vmsleu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmsleu.mask.nxv2i8.i8( define @intrinsic_vmsleu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmsleu.mask.nxv4i8.i8( define @intrinsic_vmsleu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmsleu.mask.nxv8i8.i8( define @intrinsic_vmsleu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmsleu.mask.nxv16i8.i8( define @intrinsic_vmsleu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmsleu.mask.nxv32i8.i8( define @intrinsic_vmsleu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmsleu.mask.nxv1i16.i16( define @intrinsic_vmsleu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmsleu.mask.nxv2i16.i16( define @intrinsic_vmsleu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmsleu.mask.nxv4i16.i16( define @intrinsic_vmsleu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmsleu.mask.nxv8i16.i16( define @intrinsic_vmsleu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmsleu.mask.nxv16i16.i16( define @intrinsic_vmsleu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmsleu.mask.nxv1i32.i32( define @intrinsic_vmsleu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmsleu.mask.nxv2i32.i32( define @intrinsic_vmsleu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmsleu.mask.nxv4i32.i32( define @intrinsic_vmsleu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmsleu.mask.nxv8i32.i32( define @intrinsic_vmsleu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmsleu_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmsleu_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmsleu_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 407f85b4f5996b..c60f1d4ce0d2e5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmslt.mask.nxv1i8( define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmslt.mask.nxv2i8( define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmslt.mask.nxv4i8( define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmslt.mask.nxv8i8( define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmslt.mask.nxv16i8( define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmslt.mask.nxv32i8( define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmslt.mask.nxv1i16( define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmslt.mask.nxv2i16( define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmslt.mask.nxv4i16( define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmslt.mask.nxv8i16( define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmslt.mask.nxv16i16( define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmslt.mask.nxv1i32( define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmslt.mask.nxv2i32( define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmslt.mask.nxv4i32( define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmslt.mask.nxv8i32( define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmslt.mask.nxv1i64( define @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v9 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmslt.mask.nxv2i64( define @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmslt.mask.nxv4i64( define @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmslt.mask.nxv1i8.i8( define @intrinsic_vmslt_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmslt.mask.nxv2i8.i8( define @intrinsic_vmslt_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmslt.mask.nxv4i8.i8( define @intrinsic_vmslt_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmslt.mask.nxv8i8.i8( define @intrinsic_vmslt_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmslt.mask.nxv16i8.i8( define @intrinsic_vmslt_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmslt.mask.nxv32i8.i8( define @intrinsic_vmslt_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmslt.mask.nxv1i16.i16( define @intrinsic_vmslt_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmslt.mask.nxv2i16.i16( define @intrinsic_vmslt_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmslt.mask.nxv4i16.i16( define @intrinsic_vmslt_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmslt.mask.nxv8i16.i16( define @intrinsic_vmslt_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmslt.mask.nxv16i16.i16( define @intrinsic_vmslt_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmslt.mask.nxv1i32.i32( define @intrinsic_vmslt_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmslt.mask.nxv2i32.i32( define @intrinsic_vmslt_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmslt.mask.nxv4i32.i32( define @intrinsic_vmslt_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmslt.mask.nxv8i32.i32( define @intrinsic_vmslt_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmslt_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmslt_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmslt_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmslt_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index e051b332018fd5..a9f4dc45a9aebd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsltu.mask.nxv1i8( define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsltu.mask.nxv2i8( define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsltu.mask.nxv4i8( define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsltu.mask.nxv8i8( define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsltu.mask.nxv16i8( define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsltu.mask.nxv32i8( define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsltu.mask.nxv1i16( define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsltu.mask.nxv2i16( define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsltu.mask.nxv4i16( define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsltu.mask.nxv8i16( define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsltu.mask.nxv16i16( define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsltu.mask.nxv1i32( define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsltu.mask.nxv2i32( define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsltu.mask.nxv4i32( define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsltu.mask.nxv8i32( define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsltu.mask.nxv1i64( define @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v9 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsltu.mask.nxv2i64( define @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsltu.mask.nxv4i64( define @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmsltu.mask.nxv1i8.i8( define @intrinsic_vmsltu_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmsltu.mask.nxv2i8.i8( define @intrinsic_vmsltu_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmsltu.mask.nxv4i8.i8( define @intrinsic_vmsltu_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmsltu.mask.nxv8i8.i8( define @intrinsic_vmsltu_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmsltu.mask.nxv16i8.i8( define @intrinsic_vmsltu_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmsltu.mask.nxv32i8.i8( define @intrinsic_vmsltu_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmsltu.mask.nxv1i16.i16( define @intrinsic_vmsltu_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmsltu.mask.nxv2i16.i16( define @intrinsic_vmsltu_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmsltu.mask.nxv4i16.i16( define @intrinsic_vmsltu_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmsltu.mask.nxv8i16.i16( define @intrinsic_vmsltu_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmsltu.mask.nxv16i16.i16( define @intrinsic_vmsltu_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmsltu.mask.nxv1i32.i32( define @intrinsic_vmsltu_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmsltu.mask.nxv2i32.i32( define @intrinsic_vmsltu_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmsltu.mask.nxv4i32.i32( define @intrinsic_vmsltu_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmsltu.mask.nxv8i32.i32( define @intrinsic_vmsltu_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmsltu_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmsltu_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmsltu_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index 1e21b847ed20d8..43f09561bf7526 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -34,6 +34,7 @@ declare @llvm.riscv.vmsne.mask.nxv1i8( define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -85,6 +86,7 @@ declare @llvm.riscv.vmsne.mask.nxv2i8( define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -136,6 +138,7 @@ declare @llvm.riscv.vmsne.mask.nxv4i8( define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -187,6 +190,7 @@ declare @llvm.riscv.vmsne.mask.nxv8i8( define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -238,6 +242,7 @@ declare @llvm.riscv.vmsne.mask.nxv16i8( define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 @@ -289,6 +294,7 @@ declare @llvm.riscv.vmsne.mask.nxv32i8( define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 @@ -340,6 +346,7 @@ declare @llvm.riscv.vmsne.mask.nxv1i16( define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -391,6 +398,7 @@ declare @llvm.riscv.vmsne.mask.nxv2i16( define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -442,6 +450,7 @@ declare @llvm.riscv.vmsne.mask.nxv4i16( define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -493,6 +502,7 @@ declare @llvm.riscv.vmsne.mask.nxv8i16( define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 @@ -544,6 +554,7 @@ declare @llvm.riscv.vmsne.mask.nxv16i16( define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 @@ -595,6 +606,7 @@ declare @llvm.riscv.vmsne.mask.nxv1i32( define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -646,6 +658,7 @@ declare @llvm.riscv.vmsne.mask.nxv2i32( define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -697,6 +710,7 @@ declare @llvm.riscv.vmsne.mask.nxv4i32( define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 @@ -748,6 +762,7 @@ declare @llvm.riscv.vmsne.mask.nxv8i32( define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 @@ -799,6 +814,7 @@ declare @llvm.riscv.vmsne.mask.nxv1i64( define @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v9 @@ -850,6 +866,7 @@ declare @llvm.riscv.vmsne.mask.nxv2i64( define @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 @@ -901,6 +918,7 @@ declare @llvm.riscv.vmsne.mask.nxv4i64( define @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 @@ -952,6 +970,7 @@ declare @llvm.riscv.vmsne.mask.nxv1i8.i8( define @intrinsic_vmsne_mask_vx_nxv1i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu @@ -999,6 +1018,7 @@ declare @llvm.riscv.vmsne.mask.nxv2i8.i8( define @intrinsic_vmsne_mask_vx_nxv2i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu @@ -1046,6 +1066,7 @@ declare @llvm.riscv.vmsne.mask.nxv4i8.i8( define @intrinsic_vmsne_mask_vx_nxv4i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu @@ -1093,6 +1114,7 @@ declare @llvm.riscv.vmsne.mask.nxv8i8.i8( define @intrinsic_vmsne_mask_vx_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu @@ -1140,6 +1162,7 @@ declare @llvm.riscv.vmsne.mask.nxv16i8.i8( define @intrinsic_vmsne_mask_vx_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu @@ -1187,6 +1210,7 @@ declare @llvm.riscv.vmsne.mask.nxv32i8.i8( define @intrinsic_vmsne_mask_vx_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu @@ -1234,6 +1258,7 @@ declare @llvm.riscv.vmsne.mask.nxv1i16.i16( define @intrinsic_vmsne_mask_vx_nxv1i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -1281,6 +1306,7 @@ declare @llvm.riscv.vmsne.mask.nxv2i16.i16( define @intrinsic_vmsne_mask_vx_nxv2i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu @@ -1328,6 +1354,7 @@ declare @llvm.riscv.vmsne.mask.nxv4i16.i16( define @intrinsic_vmsne_mask_vx_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu @@ -1375,6 +1402,7 @@ declare @llvm.riscv.vmsne.mask.nxv8i16.i16( define @intrinsic_vmsne_mask_vx_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu @@ -1422,6 +1450,7 @@ declare @llvm.riscv.vmsne.mask.nxv16i16.i16( define @intrinsic_vmsne_mask_vx_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu @@ -1469,6 +1498,7 @@ declare @llvm.riscv.vmsne.mask.nxv1i32.i32( define @intrinsic_vmsne_mask_vx_nxv1i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -1516,6 +1546,7 @@ declare @llvm.riscv.vmsne.mask.nxv2i32.i32( define @intrinsic_vmsne_mask_vx_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu @@ -1563,6 +1594,7 @@ declare @llvm.riscv.vmsne.mask.nxv4i32.i32( define @intrinsic_vmsne_mask_vx_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu @@ -1610,6 +1642,7 @@ declare @llvm.riscv.vmsne.mask.nxv8i32.i32( define @intrinsic_vmsne_mask_vx_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu @@ -1684,6 +1717,7 @@ define @intrinsic_vmsne_mask_vx_nxv1i64_i64( ; ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu @@ -1758,6 +1792,7 @@ define @intrinsic_vmsne_mask_vx_nxv2i64_i64( ; ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v11, v0 ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu @@ -1832,6 +1867,7 @@ define @intrinsic_vmsne_mask_vx_nxv4i64_i64( ; ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu @@ -1867,6 +1903,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv1i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -1902,6 +1939,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv2i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -1937,6 +1975,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv4i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -1972,6 +2011,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv8i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -2007,6 +2047,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv16i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -2042,6 +2083,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv32i8_i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -2077,6 +2119,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv1i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -2112,6 +2155,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv2i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -2147,6 +2191,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv4i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -2182,6 +2227,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv8i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -2217,6 +2263,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv16i16_i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -2252,6 +2299,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv1i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -2287,6 +2335,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv2i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -2322,6 +2371,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv4i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -2357,6 +2407,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv8i32_i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -2392,6 +2443,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv1i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -2427,6 +2479,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv2i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -2462,6 +2515,7 @@ entry: define @intrinsic_vmsne_mask_vi_nxv4i64_i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll index b0a28e6e455b07..e80131315b5e79 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll @@ -31,6 +31,7 @@ declare @llvm.riscv.vmsof.mask.nxv1i1( define @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -73,6 +74,7 @@ declare @llvm.riscv.vmsof.mask.nxv2i1( define @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -115,6 +117,7 @@ declare @llvm.riscv.vmsof.mask.nxv4i1( define @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -157,6 +160,7 @@ declare @llvm.riscv.vmsof.mask.nxv8i1( define @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -199,6 +203,7 @@ declare @llvm.riscv.vmsof.mask.nxv16i1( define @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv16i1_nxv16i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -241,6 +246,7 @@ declare @llvm.riscv.vmsof.mask.nxv32i1( define @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv32i1_nxv32i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -283,6 +289,7 @@ declare @llvm.riscv.vmsof.mask.nxv64i1( define @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv64i1_nxv64i1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll index 7f248a39b54fa9..7107063693a08d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll @@ -49,6 +49,7 @@ define @vadd_same_passthru( %passthru, @unfoldable_diff_avl_unknown( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: unfoldable_diff_avl_unknown: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v14, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma ; CHECK-NEXT: vadd.vv v14, v10, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll index f7ca65801dc874..fa2f750c6000c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll @@ -5,6 +5,7 @@ define iXLen @bool_vec( %src, %m, i32 %evl) { ; RV32-LABEL: bool_vec: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -17,6 +18,7 @@ define iXLen @bool_vec( %src, %m, i32 %evl) { ; ; RV64-LABEL: bool_vec: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v9, v0 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 @@ -35,6 +37,7 @@ define iXLen @bool_vec( %src, %m, i32 %evl) { define iXLen @bool_vec_zero_poison( %src, %m, i32 %evl) { ; RV32-LABEL: bool_vec_zero_poison: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, ma @@ -43,6 +46,7 @@ define iXLen @bool_vec_zero_poison( %src, %m, ; ; RV64-LABEL: bool_vec_zero_poison: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; RV64-NEXT: vmv1r.v v9, v0 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-select.ll b/llvm/test/CodeGen/RISCV/rvv/vp-select.ll index c8a048971a803d..5aaf33a4c4406e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-select.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-select.ll @@ -12,6 +12,7 @@ define @all_ones( %true, define @all_zeroes( %true, %false, i32 %evl) { ; CHECK-LABEL: all_zeroes: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %v = call @llvm.vp.select.nxv1i64( splat (i1 false), %true, %false, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll index 2a137099bcb0f4..a03a12871fc470 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll @@ -10,6 +10,7 @@ declare <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1>, <16 x i1>, i32, define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -34,6 +35,7 @@ define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %ev define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -58,6 +60,7 @@ define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb, define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -83,6 +86,7 @@ define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -107,6 +111,7 @@ define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %ev define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v4i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -131,6 +136,7 @@ define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb, define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v4i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -156,6 +162,7 @@ define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -180,6 +187,7 @@ define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %ev define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v8i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -204,6 +212,7 @@ define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb, define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v8i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -229,6 +238,7 @@ define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -253,6 +263,7 @@ define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v16i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -277,6 +288,7 @@ define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1> define <16 x i1> @test_vp_splice_v16i1_masked(<16 x i1> %va, <16 x i1> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v16i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index fc446d0a3a88ac..39e77b124aee28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -13,6 +13,7 @@ declare @llvm.experimental.vp.splice.nxv64i1( @test_vp_splice_nxv1i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -37,6 +38,7 @@ define @test_vp_splice_nxv1i1( %va, @test_vp_splice_nxv1i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv1i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -61,6 +63,7 @@ define @test_vp_splice_nxv1i1_negative_offset( @test_vp_splice_nxv1i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv1i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -86,6 +89,7 @@ define @test_vp_splice_nxv1i1_masked( %va, @test_vp_splice_nxv2i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -110,6 +114,7 @@ define @test_vp_splice_nxv2i1( %va, @test_vp_splice_nxv2i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -134,6 +139,7 @@ define @test_vp_splice_nxv2i1_negative_offset( @test_vp_splice_nxv2i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -159,6 +165,7 @@ define @test_vp_splice_nxv2i1_masked( %va, @test_vp_splice_nxv4i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -183,6 +190,7 @@ define @test_vp_splice_nxv4i1( %va, @test_vp_splice_nxv4i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv4i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -207,6 +215,7 @@ define @test_vp_splice_nxv4i1_negative_offset( @test_vp_splice_nxv4i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv4i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -232,6 +241,7 @@ define @test_vp_splice_nxv4i1_masked( %va, @test_vp_splice_nxv8i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -256,6 +266,7 @@ define @test_vp_splice_nxv8i1( %va, @test_vp_splice_nxv8i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv8i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -280,6 +291,7 @@ define @test_vp_splice_nxv8i1_negative_offset( @test_vp_splice_nxv8i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv8i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -305,6 +317,7 @@ define @test_vp_splice_nxv8i1_masked( %va, @test_vp_splice_nxv16i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -329,6 +342,7 @@ define @test_vp_splice_nxv16i1( %va, @test_vp_splice_nxv16i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv16i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -353,6 +367,7 @@ define @test_vp_splice_nxv16i1_negative_offset( @test_vp_splice_nxv16i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv16i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -379,6 +394,7 @@ define @test_vp_splice_nxv16i1_masked( %va, define @test_vp_splice_nxv32i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -403,6 +419,7 @@ define @test_vp_splice_nxv32i1( %va, @test_vp_splice_nxv32i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv32i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -427,6 +444,7 @@ define @test_vp_splice_nxv32i1_negative_offset( @test_vp_splice_nxv32i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv32i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -453,6 +471,7 @@ define @test_vp_splice_nxv32i1_masked( %va, define @test_vp_splice_nxv64i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -477,6 +496,7 @@ define @test_vp_splice_nxv64i1( %va, @test_vp_splice_nxv64i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv64i1_negative_offset: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -501,6 +521,7 @@ define @test_vp_splice_nxv64i1_negative_offset( @test_vp_splice_nxv64i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv64i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index 3e423c8ec99030..ca52ce6e2c4a1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -258,12 +258,12 @@ declare @llvm.vp.gather.nxv32i8.nxv32p0(, define @vpgather_baseidx_nxv32i8(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv32i8: ; RV32: # %bb.0: +; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a2, a3, 1 ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: sub a4, a1, a2 -; RV32-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a3 ; RV32-NEXT: sltu a3, a1, a4 ; RV32-NEXT: addi a3, a3, -1 @@ -285,12 +285,12 @@ define @vpgather_baseidx_nxv32i8(ptr %base, @llvm.vp.gather.nxv16f64.nxv16p0( @vpgather_nxv16f64( %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: srli a3, a1, 3 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vx v0, v0, a3 ; RV32-NEXT: sltu a3, a0, a2 ; RV32-NEXT: addi a3, a3, -1 @@ -2480,11 +2480,11 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: vsll.vi v24, v16, 3 @@ -2531,8 +2531,8 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: vsll.vi v24, v16, 3 @@ -2589,8 +2589,8 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; ; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsext.vf4 v16, v10 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: vsll.vi v16, v16, 3 @@ -2623,8 +2623,8 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vzext.vf2 v16, v8 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: vsll.vi v24, v16, 3 @@ -2648,8 +2648,8 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vzext.vf2 v16, v8 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: vsll.vi v24, v16, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index bd7ea6c19d0b30..74f642829e0d09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -522,12 +522,12 @@ declare @llvm.vp.load.nxv16f64.p0(ptr, define @vpload_nxv16f64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: slli a4, a2, 3 ; CHECK-NEXT: srli a5, a2, 3 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-NEXT: sltu a5, a1, a3 ; CHECK-NEXT: addi a5, a5, -1 @@ -561,6 +561,7 @@ declare @llvm.vector.extract.nxv16f64( @vpload_nxv17f64(ptr %ptr, ptr %out, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv17f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a5, a3, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index f029d0b1b01bc0..88a8ebcc90054b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -361,12 +361,12 @@ define @vpmerge_vv_nxv128i8( %va, @vpmerge_vv_nxv128i8( %va, @vpmerge_vx_nxv128i8(i8 %a, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -429,8 +429,8 @@ define @vpmerge_vx_nxv128i8(i8 %a, %vb, define @vpmerge_vi_nxv128i8( %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 8978dc268d4e52..35f922734c57a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -468,6 +468,7 @@ define void @vpstore_nxv17f64( %val, ptr %ptr, , , i define zeroext i1 @vpreduce_or_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -39,6 +40,7 @@ declare i1 @llvm.vp.reduce.xor.nxv1i1(i1, , , define zeroext i1 @vpreduce_xor_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -71,6 +73,7 @@ declare i1 @llvm.vp.reduce.or.nxv2i1(i1, , , i define zeroext i1 @vpreduce_or_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -87,6 +90,7 @@ declare i1 @llvm.vp.reduce.xor.nxv2i1(i1, , , define zeroext i1 @vpreduce_xor_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -119,6 +123,7 @@ declare i1 @llvm.vp.reduce.or.nxv4i1(i1, , , i define zeroext i1 @vpreduce_or_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -135,6 +140,7 @@ declare i1 @llvm.vp.reduce.xor.nxv4i1(i1, , , define zeroext i1 @vpreduce_xor_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -167,6 +173,7 @@ declare i1 @llvm.vp.reduce.or.nxv8i1(i1, , , i define zeroext i1 @vpreduce_or_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -183,6 +190,7 @@ declare i1 @llvm.vp.reduce.xor.nxv8i1(i1, , , define zeroext i1 @vpreduce_xor_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -215,6 +223,7 @@ declare i1 @llvm.vp.reduce.or.nxv16i1(i1, , define zeroext i1 @vpreduce_or_nxv16i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -231,6 +240,7 @@ declare i1 @llvm.vp.reduce.xor.nxv16i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -263,6 +273,7 @@ declare i1 @llvm.vp.reduce.or.nxv32i1(i1, , define zeroext i1 @vpreduce_or_nxv32i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -279,6 +290,7 @@ declare i1 @llvm.vp.reduce.xor.nxv32i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -295,6 +307,7 @@ declare i1 @llvm.vp.reduce.or.nxv40i1(i1, , define zeroext i1 @vpreduce_or_nxv40i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv40i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -327,6 +340,7 @@ declare i1 @llvm.vp.reduce.or.nxv64i1(i1, , define zeroext i1 @vpreduce_or_nxv64i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -343,6 +357,7 @@ declare i1 @llvm.vp.reduce.xor.nxv64i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -359,6 +374,7 @@ declare i1 @llvm.vp.reduce.or.nxv128i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_or_nxv128i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 @@ -390,6 +406,7 @@ declare i1 @llvm.vp.reduce.add.nxv1i1(i1, , , define zeroext i1 @vpreduce_add_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -406,6 +423,7 @@ declare i1 @llvm.vp.reduce.add.nxv2i1(i1, , , define zeroext i1 @vpreduce_add_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -422,6 +440,7 @@ declare i1 @llvm.vp.reduce.add.nxv4i1(i1, , , define zeroext i1 @vpreduce_add_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -438,6 +457,7 @@ declare i1 @llvm.vp.reduce.add.nxv8i1(i1, , , define zeroext i1 @vpreduce_add_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -454,6 +474,7 @@ declare i1 @llvm.vp.reduce.add.nxv16i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -470,6 +491,7 @@ declare i1 @llvm.vp.reduce.add.nxv32i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -486,6 +508,7 @@ declare i1 @llvm.vp.reduce.add.nxv64i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_add_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -615,6 +638,7 @@ declare i1 @llvm.vp.reduce.smin.nxv1i1(i1, , , define zeroext i1 @vpreduce_smin_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -631,6 +655,7 @@ declare i1 @llvm.vp.reduce.smin.nxv2i1(i1, , , define zeroext i1 @vpreduce_smin_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -647,6 +672,7 @@ declare i1 @llvm.vp.reduce.smin.nxv4i1(i1, , , define zeroext i1 @vpreduce_smin_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -663,6 +689,7 @@ declare i1 @llvm.vp.reduce.smin.nxv8i1(i1, , , define zeroext i1 @vpreduce_smin_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -679,6 +706,7 @@ declare i1 @llvm.vp.reduce.smin.nxv16i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -695,6 +723,7 @@ declare i1 @llvm.vp.reduce.smin.nxv32i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -711,6 +740,7 @@ declare i1 @llvm.vp.reduce.smin.nxv64i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_smin_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma @@ -727,6 +757,7 @@ declare i1 @llvm.vp.reduce.umax.nxv1i1(i1, , , define zeroext i1 @vpreduce_umax_nxv1i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv1i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma @@ -743,6 +774,7 @@ declare i1 @llvm.vp.reduce.umax.nxv2i1(i1, , , define zeroext i1 @vpreduce_umax_nxv2i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv2i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma @@ -759,6 +791,7 @@ declare i1 @llvm.vp.reduce.umax.nxv4i1(i1, , , define zeroext i1 @vpreduce_umax_nxv4i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv4i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma @@ -775,6 +808,7 @@ declare i1 @llvm.vp.reduce.umax.nxv8i1(i1, , , define zeroext i1 @vpreduce_umax_nxv8i1(i1 zeroext %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma @@ -791,6 +825,7 @@ declare i1 @llvm.vp.reduce.umax.nxv16i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv16i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma @@ -807,6 +842,7 @@ declare i1 @llvm.vp.reduce.umax.nxv32i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv32i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma @@ -823,6 +859,7 @@ declare i1 @llvm.vp.reduce.umax.nxv64i1(i1, , %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_umax_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll index 1779fc12095e88..7b460f2c058f85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -22,8 +22,8 @@ define internal void @foo( %v15, %0, This Inner Loop Header: Depth=1 ; NOSUBREG-NEXT: vl1r.v v9, (zero) -; NOSUBREG-NEXT: vmv1r.v v13, v12 ; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; NOSUBREG-NEXT: vmv1r.v v13, v12 ; NOSUBREG-NEXT: vrgatherei16.vv v13, v9, v10 ; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; NOSUBREG-NEXT: vand.vv v9, v8, v13 @@ -42,8 +42,8 @@ define internal void @foo( %v15, %0, This Inner Loop Header: Depth=1 ; SUBREG-NEXT: vl1r.v v9, (zero) -; SUBREG-NEXT: vmv1r.v v13, v12 ; SUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; SUBREG-NEXT: vmv1r.v v13, v12 ; SUBREG-NEXT: vrgatherei16.vv v13, v9, v10 ; SUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; SUBREG-NEXT: vand.vv v9, v8, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll index 12c439346e3569..3421c6af334bc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll @@ -572,8 +572,8 @@ declare @llvm.vp.sadd.sat.nxv128i8(, @vsadd_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -1350,11 +1350,11 @@ declare @llvm.vp.sadd.sat.nxv32i32(, @vsadd_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vi_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll index d962f703abfd22..180e0799044e8b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -571,8 +571,8 @@ declare @llvm.vp.uadd.sat.nxv128i8(, @vsaddu_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsaddu_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -1349,11 +1349,11 @@ declare @llvm.vp.uadd.sat.nxv32i32(, @vsaddu_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsaddu_vi_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll index a63d14e8b6c04e..b81986a13bd670 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll @@ -126,6 +126,7 @@ define @vmerge_truelhs_nxv8bf16_0( %v define @vmerge_falselhs_nxv8bf16_0( %va, %vb) { ; CHECK-LABEL: vmerge_falselhs_nxv8bf16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %vc = select zeroinitializer, %va, %vb diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index 1fc33dc73a27dc..a2b4f308b6a323 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -175,6 +175,7 @@ define @vmerge_truelhs_nxv8f16_0( %va, @vmerge_falselhs_nxv8f16_0( %va, %vb) { ; CHECK-LABEL: vmerge_falselhs_nxv8f16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret %vc = select zeroinitializer, %va, %vb diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll index 9cafa28eb429f1..8179374cb8c7f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll @@ -803,6 +803,7 @@ define @vmerge_truelhs_nxv8i64_0( %va, @vmerge_falselhs_nxv8i64_0( %va, %vb) { ; CHECK-LABEL: vmerge_falselhs_nxv8i64_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret %vc = select zeroinitializer, %va, %vb diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index bb51f0592dc17a..a7c521cd464369 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -362,6 +362,7 @@ define @select_nxv32i32( %a, @select_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_nxv16f64( %a, @select_nxv16f64( %a, @select_cond_x_cond( %x, @select_undef_T_F( %x, %y, i32 zeroext %evl) { ; CHECK-LABEL: select_undef_T_F: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %a = call @llvm.vp.select.nxv2i1( poison, %x, %y, i32 %evl) @@ -852,6 +853,7 @@ define @select_undef_undef_F( %x, i32 zeroext define @select_unknown_undef_F( %x, %y, i32 zeroext %evl) { ; CHECK-LABEL: select_unknown_undef_F: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %a = call @llvm.vp.select.nxv2i1( %x, undef, %y, i32 %evl) @@ -861,6 +863,7 @@ define @select_unknown_undef_F( %x, @select_unknown_T_undef( %x, %y, i32 zeroext %evl) { ; CHECK-LABEL: select_unknown_T_undef: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %a = call @llvm.vp.select.nxv2i1( %x, %y, poison, i32 %evl) @@ -870,6 +873,7 @@ define @select_unknown_T_undef( %x, @select_false_T_F( %x, %y, %z, i32 zeroext %evl) { ; CHECK-LABEL: select_false_T_F: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: ret %a = call @llvm.vp.select.nxv2i1( zeroinitializer, %y, %z, i32 %evl) @@ -879,6 +883,7 @@ define @select_false_T_F( %x, @select_unknown_T_T( %x, %y, i32 zeroext %evl) { ; CHECK-LABEL: select_unknown_T_T: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %a = call @llvm.vp.select.nxv2i1( %x, %y, %y, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-O0.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-O0.ll index 33acfb7dceb949..8186c67bf93e07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-O0.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-O0.ll @@ -18,11 +18,11 @@ declare @llvm.riscv.vle.mask.nxv1i64( define <2 x double> @fixed_length(<2 x double> %a, <2 x double> %b) nounwind { ; CHECK-LABEL: fixed_length: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: # kill: def $v11 killed $v10 ; CHECK-NEXT: # kill: def $v9 killed $v8 ; CHECK-NEXT: # implicit-def: $v9 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v8, v10 ; CHECK-NEXT: # implicit-def: $v8 ; CHECK-NEXT: vfadd.vv v8, v9, v10 @@ -36,9 +36,9 @@ entry: define @scalable( %a, %b) nounwind { ; CHECK-LABEL: scalable: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: # implicit-def: $v9 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v9, v8, v10 ; CHECK-NEXT: # implicit-def: $v8 ; CHECK-NEXT: vfadd.vv v8, v9, v10 @@ -53,8 +53,8 @@ entry: define @intrinsic_same_vlmax( %a, %b) nounwind { ; CHECK-LABEL: intrinsic_same_vlmax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: # implicit-def: $v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma ; CHECK-NEXT: vfadd.vv v9, v8, v10 @@ -81,8 +81,8 @@ entry: define @intrinsic_same_avl_imm( %a, %b) nounwind { ; CHECK-LABEL: intrinsic_same_avl_imm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vsetivli a0, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: # implicit-def: $v9 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma ; CHECK-NEXT: vfadd.vv v9, v8, v10 @@ -108,6 +108,7 @@ entry: define @intrinsic_same_avl_reg(i64 %avl, %a, %b) nounwind { ; CHECK-LABEL: intrinsic_same_avl_reg: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma ; CHECK-NEXT: # implicit-def: $v9 @@ -135,6 +136,7 @@ entry: define @intrinsic_diff_avl_reg(i64 %avl, i64 %avl2, %a, %b) nounwind { ; CHECK-LABEL: intrinsic_diff_avl_reg: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma ; CHECK-NEXT: # implicit-def: $v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index b0cb6bc6125ddf..2ca2803f3c746a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -377,8 +377,8 @@ entry: define @test19( %a, double %b) nounwind { ; CHECK-LABEL: test19: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 2, e64, m8, tu, ma ; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v9, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll index d3b905ef897b1b..3c91131fe4d121 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll @@ -151,11 +151,11 @@ declare @llvm.vp.sext.nxv32i32.nxv32i8(, < define @vsext_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vsext_nxv32i8_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll index 581cc666b6cbd5..44d3ee96f5e61d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll @@ -508,11 +508,11 @@ declare @llvm.vp.sitofp.nxv32f16.nxv32i32( @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFH-NEXT: vmv1r.v v24, v0 ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: slli a1, a1, 1 -; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; ZVFH-NEXT: vslidedown.vx v0, v0, a2 ; ZVFH-NEXT: sub a2, a0, a1 ; ZVFH-NEXT: sltu a3, a0, a2 @@ -532,11 +532,11 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ; ZVFHMIN-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: srli a2, a1, 2 ; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: sub a2, a0, a1 ; ZVFHMIN-NEXT: sltu a3, a0, a2 @@ -566,11 +566,11 @@ declare @llvm.vp.sitofp.nxv32f32.nxv32i32( @vsitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index f9c24eeec31c56..7ee6ea9e19df02 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -590,8 +590,8 @@ declare @llvm.vp.ssub.sat.nxv128i8(, @vssub_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -1392,11 +1392,11 @@ declare @llvm.vp.ssub.sat.nxv32i32(, @vssub_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vi_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 04a1b522a8a33a..7674a457ca9617 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -588,8 +588,8 @@ declare @llvm.vp.usub.sat.nxv128i8(, @vssubu_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -1390,11 +1390,11 @@ declare @llvm.vp.usub.sat.nxv32i32(, @vssubu_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vi_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index e62b7a00396388..fd5bf4ebcede82 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -157,11 +157,11 @@ declare @llvm.vp.trunc.nxv15i16.nxv15i64( define @vtrunc_nxv15i16_nxv15i64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv15i16_nxv15i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 @@ -214,11 +214,11 @@ declare @llvm.vp.trunc.nxv32i7.nxv32i32(, define @vtrunc_nxv32i7_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv32i7_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -248,11 +248,11 @@ declare @llvm.vp.trunc.nxv32i8.nxv32i32(, define @vtrunc_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv32i8_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -288,6 +288,7 @@ define @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @llvm.vp.uitofp.nxv32f16.nxv32i32( @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFH-NEXT: vmv1r.v v24, v0 ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: slli a1, a1, 1 -; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; ZVFH-NEXT: vslidedown.vx v0, v0, a2 ; ZVFH-NEXT: sub a2, a0, a1 ; ZVFH-NEXT: sltu a3, a0, a2 @@ -524,11 +524,11 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ; ZVFHMIN-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: srli a2, a1, 2 ; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: sub a2, a0, a1 ; ZVFHMIN-NEXT: sltu a3, a0, a2 @@ -558,11 +558,11 @@ declare @llvm.vp.uitofp.nxv32f32.nxv32i32( @vuitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll index 10e655c8445409..934d7eb43ac2ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll @@ -151,11 +151,11 @@ declare @llvm.vp.zext.nxv32i32.nxv32i8(, < define @vzext_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vzext_nxv32i8_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2