diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 91bccc77f93fd..3b07a7acf10a5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2582,11 +2582,12 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - // If we know the exact VLEN, our VL is exactly equal to VLMAX, and - // we can't encode the AVL as an immediate, use the VLMAX encoding. + // If we know the exact VLEN, and our VL is exactly equal to VLMAX, + // canonicalize the representation. InsertVSETVLI will pick the immediate + // encoding later if profitable. const auto [MinVLMAX, MaxVLMAX] = RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); - if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31) + if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX) return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index b2d36b362b3a0..1aa35f1c644a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -800,7 +800,18 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { return NewInfo; } +static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, + RISCVII::VLMUL VLMul) { + auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul); + if (Fractional) + VLEN = VLEN / LMul; + else + VLEN = VLEN * LMul; + return VLEN/SEW; +} + static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, + const RISCVSubtarget &ST, const MachineRegisterInfo *MRI) { VSETVLIInfo InstrInfo; @@ -842,8 +853,15 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, if (VLOp.isImm()) { int64_t Imm = VLOp.getImm(); // Conver the VLMax sentintel to X0 register. - if (Imm == RISCV::VLMaxSentinel) - InstrInfo.setAVLReg(RISCV::X0); + if (Imm == RISCV::VLMaxSentinel) { + // If we know the exact VLEN, see if we can use the constant encoding + // for the VLMAX instead. This reduces register pressure slightly. + const unsigned VLMAX = computeVLMAX(ST.getRealMaxVLen(), SEW, VLMul); + if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31) + InstrInfo.setAVLImm(VLMAX); + else + InstrInfo.setAVLReg(RISCV::X0); + } else InstrInfo.setAVLImm(Imm); } else { @@ -979,7 +997,7 @@ static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo) const { - assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI)); + assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI)); if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; @@ -1067,7 +1085,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, if (!RISCVII::hasSEWOp(TSFlags)) return; - const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI); assert(NewInfo.isValid() && !NewInfo.isUnknown()); if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info)) return; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index 0b9db09aab3a9..69d71a1d0c5a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -593,25 +593,45 @@ define void @extract_v2i1_nxv2i1_0( %x, ptr %y) { } define void @extract_v2i1_nxv2i1_2( %x, ptr %y) { -; CHECK-LABEL: extract_v2i1_nxv2i1_2: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret +; CHECK-V-LABEL: extract_v2i1_nxv2i1_2: +; CHECK-V: # %bb.0: +; CHECK-V-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-V-NEXT: vmv.v.i v8, 0 +; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-V-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-V-NEXT: vmsne.vi v0, v8, 0 +; CHECK-V-NEXT: vmv.v.i v8, 0 +; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-V-NEXT: vmv.v.i v9, 0 +; CHECK-V-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-V-NEXT: vmv.v.v v9, v8 +; CHECK-V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-V-NEXT: vmsne.vi v8, v9, 0 +; CHECK-V-NEXT: vsm.v v8, (a0) +; CHECK-V-NEXT: ret +; +; CHECK-KNOWNVLEN128-LABEL: extract_v2i1_nxv2i1_2: +; CHECK-KNOWNVLEN128: # %bb.0: +; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-KNOWNVLEN128-NEXT: vmv.v.i v8, 0 +; CHECK-KNOWNVLEN128-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-KNOWNVLEN128-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-KNOWNVLEN128-NEXT: vmsne.vi v0, v8, 0 +; CHECK-KNOWNVLEN128-NEXT: vmv.v.i v8, 0 +; CHECK-KNOWNVLEN128-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-KNOWNVLEN128-NEXT: vmv.v.i v9, 0 +; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-KNOWNVLEN128-NEXT: vmv.v.v v9, v8 +; CHECK-KNOWNVLEN128-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-KNOWNVLEN128-NEXT: vmsne.vi v8, v9, 0 +; CHECK-KNOWNVLEN128-NEXT: vsm.v v8, (a0) +; CHECK-KNOWNVLEN128-NEXT: ret %c = call <2 x i1> @llvm.vector.extract.v2i1.nxv2i1( %x, i64 2) store <2 x i1> %c, ptr %y ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store.ll index 3bb465ba998a2..4d72bc9af1903 100644 --- a/llvm/test/CodeGen/RISCV/rvv/load-add-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store.ll @@ -362,7 +362,7 @@ define void @exact_vlen_vadd_vint8m1(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_ ; CHECK: # %bb.0: ; CHECK-NEXT: vl1r.v v8, (a1) ; CHECK-NEXT: vl1r.v v9, (a2) -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: ret @@ -392,7 +392,7 @@ define void @exact_vlen_vadd_vint8m2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_ define void @exact_vlen_vadd_vint8mf2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_range(2,2) { ; CHECK-LABEL: exact_vlen_vadd_vint8mf2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v9, (a2) ; CHECK-NEXT: vadd.vv v8, v8, v9 @@ -408,7 +408,7 @@ define void @exact_vlen_vadd_vint8mf2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale define void @exact_vlen_vadd_vint8mf4(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_range(2,2) { ; CHECK-LABEL: exact_vlen_vadd_vint8mf4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v9, (a2) ; CHECK-NEXT: vadd.vv v8, v8, v9 @@ -424,7 +424,7 @@ define void @exact_vlen_vadd_vint8mf4(ptr %pc, ptr %pa, ptr %pb) nounwind vscale define void @exact_vlen_vadd_vint8mf8(ptr %pc, ptr %pa, ptr %pb) nounwind vscale_range(2,2) { ; CHECK-LABEL: exact_vlen_vadd_vint8mf8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a3, zero, e8, mf8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v9, (a2) ; CHECK-NEXT: vadd.vv v8, v8, v9 @@ -442,7 +442,7 @@ define void @exact_vlen_vadd_vint32m1(ptr %pc, ptr %pa, ptr %pb) nounwind vscale ; CHECK: # %bb.0: ; CHECK-NEXT: vl1re32.v v8, (a1) ; CHECK-NEXT: vl1re32.v v9, (a2) -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: ret @@ -458,7 +458,7 @@ define void @exact_vlen_vadd_vint32m2(ptr %pc, ptr %pa, ptr %pb) nounwind vscale ; CHECK: # %bb.0: ; CHECK-NEXT: vl2re32.v v8, (a1) ; CHECK-NEXT: vl2re32.v v10, (a2) -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vs2r.v v8, (a0) ; CHECK-NEXT: ret @@ -474,7 +474,7 @@ define void @exact_vlen_vadd_vint32m4(ptr %pc, ptr %pa, ptr %pb) nounwind vscale ; CHECK: # %bb.0: ; CHECK-NEXT: vl4re32.v v8, (a1) ; CHECK-NEXT: vl4re32.v v12, (a2) -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vs4r.v v8, (a0) ; CHECK-NEXT: ret