diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 203bdffc49ae18..78d581c8ceadfd 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3281,14 +3281,16 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, // Avoid setting up the register pressure tracker for small regions to save // compile time. As a rough heuristic, only track pressure when the number of - // schedulable instructions exceeds half the integer register file. + // schedulable instructions exceeds half the allocatable integer register file + // that is the largest legal integer regiser type. RegionPolicy.ShouldTrackPressure = true; - for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { + for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) { MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; if (TLI->isTypeLegal(LegalIntVT)) { unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( TLI->getRegClassFor(LegalIntVT)); RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + break; } } diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll index bf48c0df3e4961..7cde034726e0b5 100644 --- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll @@ -4,12 +4,12 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; LA64-LABEL: atomicrmw_uinc_wrap_i8: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: slli.d $a4, $a0, 3 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 -; LA64-NEXT: ori $a3, $zero, 255 -; LA64-NEXT: sll.w $a4, $a3, $a2 +; LA64-NEXT: andi $a2, $a4, 24 +; LA64-NEXT: ori $a5, $zero, 255 ; LA64-NEXT: ld.w $a3, $a0, 0 -; LA64-NEXT: andi $a2, $a2, 24 +; LA64-NEXT: sll.w $a4, $a5, $a4 ; LA64-NEXT: nor $a4, $a4, $zero ; LA64-NEXT: andi $a1, $a1, 255 ; LA64-NEXT: .p2align 4, , 16 @@ -54,13 +54,13 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; LA64-LABEL: atomicrmw_uinc_wrap_i16: ; LA64: # %bb.0: -; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: slli.d $a4, $a0, 3 ; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: andi $a2, $a4, 24 ; LA64-NEXT: lu12i.w $a3, 15 -; LA64-NEXT: ori $a3, $a3, 4095 -; LA64-NEXT: sll.w $a4, $a3, $a2 +; LA64-NEXT: ori $a5, $a3, 4095 ; LA64-NEXT: ld.w $a3, $a0, 0 -; LA64-NEXT: andi $a2, $a2, 24 +; LA64-NEXT: sll.w $a4, $a5, $a4 ; LA64-NEXT: nor $a4, $a4, $zero ; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 ; LA64-NEXT: .p2align 4, , 16 diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll index d03af114bceefe..18d17751a77196 100644 --- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll +++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll @@ -124,10 +124,10 @@ define void @test_f2(ptr %P, ptr %S) nounwind { ; LA64F: # %bb.0: ; LA64F-NEXT: fld.s $fa0, $a0, 4 ; LA64F-NEXT: fld.s $fa1, $a0, 0 -; LA64F-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA64F-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) -; LA64F-NEXT: fld.s $fa2, $a0, 0 ; LA64F-NEXT: addi.w $a0, $zero, 1 +; LA64F-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; LA64F-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI1_0) +; LA64F-NEXT: fld.s $fa2, $a2, 0 ; LA64F-NEXT: movgr2fr.w $fa3, $a0 ; LA64F-NEXT: ffint.s.w $fa3, $fa3 ; LA64F-NEXT: fadd.s $fa1, $fa1, $fa3 @@ -140,10 +140,10 @@ define void @test_f2(ptr %P, ptr %S) nounwind { ; LA64D: # %bb.0: ; LA64D-NEXT: fld.s $fa0, $a0, 4 ; LA64D-NEXT: fld.s $fa1, $a0, 0 -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) -; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) -; LA64D-NEXT: fld.s $fa2, $a0, 0 ; LA64D-NEXT: addi.w $a0, $zero, 1 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI1_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI1_0) +; LA64D-NEXT: fld.s $fa2, $a2, 0 ; LA64D-NEXT: movgr2fr.w $fa3, $a0 ; LA64D-NEXT: ffint.s.w $fa3, $fa3 ; LA64D-NEXT: fadd.s $fa1, $fa1, $fa3 @@ -527,10 +527,10 @@ define void @test_d2(ptr %P, ptr %S) nounwind { ; LA64D: # %bb.0: ; LA64D-NEXT: fld.d $fa0, $a0, 8 ; LA64D-NEXT: fld.d $fa1, $a0, 0 -; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) -; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0) -; LA64D-NEXT: fld.d $fa2, $a0, 0 ; LA64D-NEXT: addi.d $a0, $zero, 1 +; LA64D-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) +; LA64D-NEXT: addi.d $a2, $a2, %pc_lo12(.LCPI4_0) +; LA64D-NEXT: fld.d $fa2, $a2, 0 ; LA64D-NEXT: movgr2fr.d $fa3, $a0 ; LA64D-NEXT: ffint.d.l $fa3, $fa3 ; LA64D-NEXT: fadd.d $fa1, $fa1, $fa3 diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index 5914e45a153302..f96e1bad2e3895 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -127,11 +127,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_uinc_wrap_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a4, a3, a0 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a0, a4, 24 +; RV64IA-NEXT: li a5, 255 ; RV64IA-NEXT: lw a3, 0(a2) -; RV64IA-NEXT: andi a0, a0, 24 +; RV64IA-NEXT: sllw a4, a5, a4 ; RV64IA-NEXT: not a4, a4 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index d5041c2a7ca78b..9e2b0b5c3cbb41 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -84,12 +84,12 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 ; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: neg a0, a0 ; CHECK64ZFBFMIN-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; CHECK64ZFBFMIN-NEXT: lui a1, 815104 ; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a1 ; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64ZFBFMIN-NEXT: neg a0, a0 ; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 @@ -187,10 +187,10 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind { ; ; RV64ID-LABEL: fcvt_ui_bf16_sat: ; RV64ID: # %bb.0: # %start -; RV64ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) ; RV64ID-NEXT: fmv.x.w a0, fa0 ; RV64ID-NEXT: slli a0, a0, 16 +; RV64ID-NEXT: lui a1, %hi(.LCPI3_0) +; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a1) ; RV64ID-NEXT: fmv.w.x fa4, a0 ; RV64ID-NEXT: fmv.w.x fa3, zero ; RV64ID-NEXT: fmax.s fa4, fa4, fa3 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index d8471129433027..67123466354c41 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -140,11 +140,11 @@ define i64 @caller_large_scalars() nounwind { ; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd zero, 48(sp) -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: sd a0, 32(sp) +; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: addi a0, sp, 32 ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: sd zero, 40(sp) +; RV64I-NEXT: sd a2, 32(sp) ; RV64I-NEXT: call callee_large_scalars ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index da882cafd99715..c147d6ec6d9b15 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -1651,8 +1651,8 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV64IFD-NEXT: lui a0, %hi(.LCPI26_1) ; RV64IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a0) ; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 ; RV64IFD-NEXT: neg a0, a0 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 ; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz ; RV64IFD-NEXT: and a0, a0, a1 @@ -1680,12 +1680,12 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV64IZFINXZDINX-NEXT: ld a1, %lo(.LCPI26_0)(a1) ; RV64IZFINXZDINX-NEXT: lui a2, %hi(.LCPI26_1) ; RV64IZFINXZDINX-NEXT: ld a2, %lo(.LCPI26_1)(a2) -; RV64IZFINXZDINX-NEXT: fmax.d a1, a0, a1 -; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 -; RV64IZFINXZDINX-NEXT: neg a0, a0 -; RV64IZFINXZDINX-NEXT: fmin.d a1, a1, a2 -; RV64IZFINXZDINX-NEXT: fcvt.l.d a1, a1, rtz -; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0 +; RV64IZFINXZDINX-NEXT: neg a3, a3 +; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a1 +; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2 +; RV64IZFINXZDINX-NEXT: fcvt.l.d a0, a0, rtz +; RV64IZFINXZDINX-NEXT: and a0, a3, a0 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -2026,8 +2026,8 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV64IFD-NEXT: lui a0, %hi(.LCPI30_1) ; RV64IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a0) ; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 ; RV64IFD-NEXT: neg a0, a0 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 ; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz ; RV64IFD-NEXT: and a0, a0, a1 @@ -2055,12 +2055,12 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV64IZFINXZDINX-NEXT: ld a1, %lo(.LCPI30_0)(a1) ; RV64IZFINXZDINX-NEXT: lui a2, %hi(.LCPI30_1) ; RV64IZFINXZDINX-NEXT: ld a2, %lo(.LCPI30_1)(a2) -; RV64IZFINXZDINX-NEXT: fmax.d a1, a0, a1 -; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 -; RV64IZFINXZDINX-NEXT: neg a0, a0 -; RV64IZFINXZDINX-NEXT: fmin.d a1, a1, a2 -; RV64IZFINXZDINX-NEXT: fcvt.l.d a1, a1, rtz -; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0 +; RV64IZFINXZDINX-NEXT: neg a3, a3 +; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a1 +; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2 +; RV64IZFINXZDINX-NEXT: fcvt.l.d a0, a0, rtz +; RV64IZFINXZDINX-NEXT: and a0, a3, a0 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i8: diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index 2c7315fbe59f6f..653b64ec730496 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -1424,12 +1424,12 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV64IF-LABEL: fcvt_w_s_sat_i16: ; RV64IF: # %bb.0: # %start ; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: neg a0, a0 ; RV64IF-NEXT: lui a1, %hi(.LCPI24_0) ; RV64IF-NEXT: flw fa5, %lo(.LCPI24_0)(a1) ; RV64IF-NEXT: lui a1, 815104 ; RV64IF-NEXT: fmv.w.x fa4, a1 ; RV64IF-NEXT: fmax.s fa4, fa0, fa4 -; RV64IF-NEXT: neg a0, a0 ; RV64IF-NEXT: fmin.s fa5, fa4, fa5 ; RV64IF-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IF-NEXT: and a0, a0, a1 @@ -1450,15 +1450,15 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; ; RV64IZFINX-LABEL: fcvt_w_s_sat_i16: ; RV64IZFINX: # %bb.0: # %start -; RV64IZFINX-NEXT: lui a1, 815104 +; RV64IZFINX-NEXT: feq.s a1, a0, a0 ; RV64IZFINX-NEXT: lui a2, %hi(.LCPI24_0) ; RV64IZFINX-NEXT: lw a2, %lo(.LCPI24_0)(a2) -; RV64IZFINX-NEXT: fmax.s a1, a0, a1 -; RV64IZFINX-NEXT: feq.s a0, a0, a0 -; RV64IZFINX-NEXT: neg a0, a0 -; RV64IZFINX-NEXT: fmin.s a1, a1, a2 -; RV64IZFINX-NEXT: fcvt.l.s a1, a1, rtz -; RV64IZFINX-NEXT: and a0, a0, a1 +; RV64IZFINX-NEXT: neg a1, a1 +; RV64IZFINX-NEXT: lui a3, 815104 +; RV64IZFINX-NEXT: fmax.s a0, a0, a3 +; RV64IZFINX-NEXT: fmin.s a0, a0, a2 +; RV64IZFINX-NEXT: fcvt.l.s a0, a0, rtz +; RV64IZFINX-NEXT: and a0, a1, a0 ; RV64IZFINX-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index 16c096290720d3..277749c75bbbf1 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -210,12 +210,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IZFH: # %bb.0: # %start ; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 ; RV64IZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IZFH-NEXT: neg a0, a0 ; RV64IZFH-NEXT: lui a1, %hi(.LCPI1_0) ; RV64IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV64IZFH-NEXT: lui a1, 815104 ; RV64IZFH-NEXT: fmv.w.x fa3, a1 ; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IZFH-NEXT: neg a0, a0 ; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IZFH-NEXT: and a0, a0, a1 @@ -240,12 +240,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IDZFH: # %bb.0: # %start ; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 ; RV64IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IDZFH-NEXT: neg a0, a0 ; RV64IDZFH-NEXT: lui a1, %hi(.LCPI1_0) ; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV64IDZFH-NEXT: lui a1, 815104 ; RV64IDZFH-NEXT: fmv.w.x fa3, a1 ; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IDZFH-NEXT: neg a0, a0 ; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IDZFH-NEXT: and a0, a0, a1 @@ -268,15 +268,15 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IZHINX-LABEL: fcvt_si_h_sat: ; RV64IZHINX: # %bb.0: # %start ; RV64IZHINX-NEXT: fcvt.s.h a0, a0 -; RV64IZHINX-NEXT: lui a1, 815104 +; RV64IZHINX-NEXT: feq.s a1, a0, a0 ; RV64IZHINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV64IZHINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) -; RV64IZHINX-NEXT: fmax.s a1, a0, a1 -; RV64IZHINX-NEXT: feq.s a0, a0, a0 -; RV64IZHINX-NEXT: neg a0, a0 -; RV64IZHINX-NEXT: fmin.s a1, a1, a2 -; RV64IZHINX-NEXT: fcvt.l.s a1, a1, rtz -; RV64IZHINX-NEXT: and a0, a0, a1 +; RV64IZHINX-NEXT: neg a1, a1 +; RV64IZHINX-NEXT: lui a3, 815104 +; RV64IZHINX-NEXT: fmax.s a0, a0, a3 +; RV64IZHINX-NEXT: fmin.s a0, a0, a2 +; RV64IZHINX-NEXT: fcvt.l.s a0, a0, rtz +; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; ; RV32IZDINXZHINX-LABEL: fcvt_si_h_sat: @@ -296,15 +296,15 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IZDINXZHINX-LABEL: fcvt_si_h_sat: ; RV64IZDINXZHINX: # %bb.0: # %start ; RV64IZDINXZHINX-NEXT: fcvt.s.h a0, a0 -; RV64IZDINXZHINX-NEXT: lui a1, 815104 +; RV64IZDINXZHINX-NEXT: feq.s a1, a0, a0 ; RV64IZDINXZHINX-NEXT: lui a2, %hi(.LCPI1_0) ; RV64IZDINXZHINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) -; RV64IZDINXZHINX-NEXT: fmax.s a1, a0, a1 -; RV64IZDINXZHINX-NEXT: feq.s a0, a0, a0 -; RV64IZDINXZHINX-NEXT: neg a0, a0 -; RV64IZDINXZHINX-NEXT: fmin.s a1, a1, a2 -; RV64IZDINXZHINX-NEXT: fcvt.l.s a1, a1, rtz -; RV64IZDINXZHINX-NEXT: and a0, a0, a1 +; RV64IZDINXZHINX-NEXT: neg a1, a1 +; RV64IZDINXZHINX-NEXT: lui a3, 815104 +; RV64IZDINXZHINX-NEXT: fmax.s a0, a0, a3 +; RV64IZDINXZHINX-NEXT: fmin.s a0, a0, a2 +; RV64IZDINXZHINX-NEXT: fcvt.l.s a0, a0, rtz +; RV64IZDINXZHINX-NEXT: and a0, a1, a0 ; RV64IZDINXZHINX-NEXT: ret ; ; RV32I-LABEL: fcvt_si_h_sat: @@ -420,12 +420,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64ID-LP64-NEXT: call __extendhfsf2 ; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 ; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5 +; RV64ID-LP64-NEXT: neg a0, a0 ; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI1_0) ; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV64ID-LP64-NEXT: lui a1, 815104 ; RV64ID-LP64-NEXT: fmv.w.x fa3, a1 ; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3 -; RV64ID-LP64-NEXT: neg a0, a0 ; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-LP64-NEXT: and a0, a0, a1 @@ -458,12 +458,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 ; RV64ID-NEXT: feq.s a0, fa0, fa0 +; RV64ID-NEXT: neg a0, a0 ; RV64ID-NEXT: lui a1, %hi(.LCPI1_0) ; RV64ID-NEXT: flw fa5, %lo(.LCPI1_0)(a1) ; RV64ID-NEXT: lui a1, 815104 ; RV64ID-NEXT: fmv.w.x fa4, a1 ; RV64ID-NEXT: fmax.s fa4, fa0, fa4 -; RV64ID-NEXT: neg a0, a0 ; RV64ID-NEXT: fmin.s fa5, fa4, fa5 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 @@ -490,12 +490,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; CHECK64-IZFHMIN: # %bb.0: # %start ; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 ; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64-IZFHMIN-NEXT: neg a0, a0 ; CHECK64-IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; CHECK64-IZFHMIN-NEXT: lui a1, 815104 ; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a1 ; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64-IZFHMIN-NEXT: neg a0, a0 ; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 @@ -518,15 +518,15 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; CHECK64-IZHINXMIN-LABEL: fcvt_si_h_sat: ; CHECK64-IZHINXMIN: # %bb.0: # %start ; CHECK64-IZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK64-IZHINXMIN-NEXT: lui a1, 815104 +; CHECK64-IZHINXMIN-NEXT: feq.s a1, a0, a0 ; CHECK64-IZHINXMIN-NEXT: lui a2, %hi(.LCPI1_0) ; CHECK64-IZHINXMIN-NEXT: lw a2, %lo(.LCPI1_0)(a2) -; CHECK64-IZHINXMIN-NEXT: fmax.s a1, a0, a1 -; CHECK64-IZHINXMIN-NEXT: feq.s a0, a0, a0 -; CHECK64-IZHINXMIN-NEXT: neg a0, a0 -; CHECK64-IZHINXMIN-NEXT: fmin.s a1, a1, a2 -; CHECK64-IZHINXMIN-NEXT: fcvt.l.s a1, a1, rtz -; CHECK64-IZHINXMIN-NEXT: and a0, a0, a1 +; CHECK64-IZHINXMIN-NEXT: neg a1, a1 +; CHECK64-IZHINXMIN-NEXT: lui a3, 815104 +; CHECK64-IZHINXMIN-NEXT: fmax.s a0, a0, a3 +; CHECK64-IZHINXMIN-NEXT: fmin.s a0, a0, a2 +; CHECK64-IZHINXMIN-NEXT: fcvt.l.s a0, a0, rtz +; CHECK64-IZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZHINXMIN-NEXT: ret ; ; CHECK32-IZDINXZHINXMIN-LABEL: fcvt_si_h_sat: @@ -546,15 +546,15 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_si_h_sat: ; CHECK64-IZDINXZHINXMIN: # %bb.0: # %start ; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: lui a1, 815104 +; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a1, a0, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: lui a2, %hi(.LCPI1_0) ; CHECK64-IZDINXZHINXMIN-NEXT: lw a2, %lo(.LCPI1_0)(a2) -; CHECK64-IZDINXZHINXMIN-NEXT: fmax.s a1, a0, a1 -; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a0, a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: neg a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: fmin.s a1, a1, a2 -; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.l.s a1, a1, rtz -; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a0, a1 +; CHECK64-IZDINXZHINXMIN-NEXT: neg a1, a1 +; CHECK64-IZDINXZHINXMIN-NEXT: lui a3, 815104 +; CHECK64-IZDINXZHINXMIN-NEXT: fmax.s a0, a0, a3 +; CHECK64-IZDINXZHINXMIN-NEXT: fmin.s a0, a0, a2 +; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.l.s a0, a0, rtz +; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a) @@ -6377,12 +6377,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IZFH: # %bb.0: # %start ; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 ; RV64IZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IZFH-NEXT: neg a0, a0 ; RV64IZFH-NEXT: lui a1, %hi(.LCPI32_0) ; RV64IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV64IZFH-NEXT: lui a1, 815104 ; RV64IZFH-NEXT: fmv.w.x fa3, a1 ; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IZFH-NEXT: neg a0, a0 ; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IZFH-NEXT: and a0, a0, a1 @@ -6407,12 +6407,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IDZFH: # %bb.0: # %start ; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 ; RV64IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IDZFH-NEXT: neg a0, a0 ; RV64IDZFH-NEXT: lui a1, %hi(.LCPI32_0) ; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV64IDZFH-NEXT: lui a1, 815104 ; RV64IDZFH-NEXT: fmv.w.x fa3, a1 ; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IDZFH-NEXT: neg a0, a0 ; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IDZFH-NEXT: and a0, a0, a1 @@ -6435,15 +6435,15 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IZHINX-LABEL: fcvt_w_s_sat_i16: ; RV64IZHINX: # %bb.0: # %start ; RV64IZHINX-NEXT: fcvt.s.h a0, a0 -; RV64IZHINX-NEXT: lui a1, 815104 +; RV64IZHINX-NEXT: feq.s a1, a0, a0 ; RV64IZHINX-NEXT: lui a2, %hi(.LCPI32_0) ; RV64IZHINX-NEXT: lw a2, %lo(.LCPI32_0)(a2) -; RV64IZHINX-NEXT: fmax.s a1, a0, a1 -; RV64IZHINX-NEXT: feq.s a0, a0, a0 -; RV64IZHINX-NEXT: neg a0, a0 -; RV64IZHINX-NEXT: fmin.s a1, a1, a2 -; RV64IZHINX-NEXT: fcvt.l.s a1, a1, rtz -; RV64IZHINX-NEXT: and a0, a0, a1 +; RV64IZHINX-NEXT: neg a1, a1 +; RV64IZHINX-NEXT: lui a3, 815104 +; RV64IZHINX-NEXT: fmax.s a0, a0, a3 +; RV64IZHINX-NEXT: fmin.s a0, a0, a2 +; RV64IZHINX-NEXT: fcvt.l.s a0, a0, rtz +; RV64IZHINX-NEXT: and a0, a1, a0 ; RV64IZHINX-NEXT: ret ; ; RV32IZDINXZHINX-LABEL: fcvt_w_s_sat_i16: @@ -6463,15 +6463,15 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IZDINXZHINX-LABEL: fcvt_w_s_sat_i16: ; RV64IZDINXZHINX: # %bb.0: # %start ; RV64IZDINXZHINX-NEXT: fcvt.s.h a0, a0 -; RV64IZDINXZHINX-NEXT: lui a1, 815104 +; RV64IZDINXZHINX-NEXT: feq.s a1, a0, a0 ; RV64IZDINXZHINX-NEXT: lui a2, %hi(.LCPI32_0) ; RV64IZDINXZHINX-NEXT: lw a2, %lo(.LCPI32_0)(a2) -; RV64IZDINXZHINX-NEXT: fmax.s a1, a0, a1 -; RV64IZDINXZHINX-NEXT: feq.s a0, a0, a0 -; RV64IZDINXZHINX-NEXT: neg a0, a0 -; RV64IZDINXZHINX-NEXT: fmin.s a1, a1, a2 -; RV64IZDINXZHINX-NEXT: fcvt.l.s a1, a1, rtz -; RV64IZDINXZHINX-NEXT: and a0, a0, a1 +; RV64IZDINXZHINX-NEXT: neg a1, a1 +; RV64IZDINXZHINX-NEXT: lui a3, 815104 +; RV64IZDINXZHINX-NEXT: fmax.s a0, a0, a3 +; RV64IZDINXZHINX-NEXT: fmin.s a0, a0, a2 +; RV64IZDINXZHINX-NEXT: fcvt.l.s a0, a0, rtz +; RV64IZDINXZHINX-NEXT: and a0, a1, a0 ; RV64IZDINXZHINX-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -6591,12 +6591,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64ID-LP64-NEXT: call __extendhfsf2 ; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 ; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5 +; RV64ID-LP64-NEXT: neg a0, a0 ; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI32_0) ; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV64ID-LP64-NEXT: lui a1, 815104 ; RV64ID-LP64-NEXT: fmv.w.x fa3, a1 ; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3 -; RV64ID-LP64-NEXT: neg a0, a0 ; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-LP64-NEXT: and a0, a0, a1 @@ -6629,12 +6629,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 ; RV64ID-NEXT: feq.s a0, fa0, fa0 +; RV64ID-NEXT: neg a0, a0 ; RV64ID-NEXT: lui a1, %hi(.LCPI32_0) ; RV64ID-NEXT: flw fa5, %lo(.LCPI32_0)(a1) ; RV64ID-NEXT: lui a1, 815104 ; RV64ID-NEXT: fmv.w.x fa4, a1 ; RV64ID-NEXT: fmax.s fa4, fa0, fa4 -; RV64ID-NEXT: neg a0, a0 ; RV64ID-NEXT: fmin.s fa5, fa4, fa5 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 @@ -6661,12 +6661,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; CHECK64-IZFHMIN: # %bb.0: # %start ; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 ; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64-IZFHMIN-NEXT: neg a0, a0 ; CHECK64-IZFHMIN-NEXT: lui a1, %hi(.LCPI32_0) ; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; CHECK64-IZFHMIN-NEXT: lui a1, 815104 ; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a1 ; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64-IZFHMIN-NEXT: neg a0, a0 ; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 @@ -6689,15 +6689,15 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; CHECK64-IZHINXMIN-LABEL: fcvt_w_s_sat_i16: ; CHECK64-IZHINXMIN: # %bb.0: # %start ; CHECK64-IZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK64-IZHINXMIN-NEXT: lui a1, 815104 +; CHECK64-IZHINXMIN-NEXT: feq.s a1, a0, a0 ; CHECK64-IZHINXMIN-NEXT: lui a2, %hi(.LCPI32_0) ; CHECK64-IZHINXMIN-NEXT: lw a2, %lo(.LCPI32_0)(a2) -; CHECK64-IZHINXMIN-NEXT: fmax.s a1, a0, a1 -; CHECK64-IZHINXMIN-NEXT: feq.s a0, a0, a0 -; CHECK64-IZHINXMIN-NEXT: neg a0, a0 -; CHECK64-IZHINXMIN-NEXT: fmin.s a1, a1, a2 -; CHECK64-IZHINXMIN-NEXT: fcvt.l.s a1, a1, rtz -; CHECK64-IZHINXMIN-NEXT: and a0, a0, a1 +; CHECK64-IZHINXMIN-NEXT: neg a1, a1 +; CHECK64-IZHINXMIN-NEXT: lui a3, 815104 +; CHECK64-IZHINXMIN-NEXT: fmax.s a0, a0, a3 +; CHECK64-IZHINXMIN-NEXT: fmin.s a0, a0, a2 +; CHECK64-IZHINXMIN-NEXT: fcvt.l.s a0, a0, rtz +; CHECK64-IZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZHINXMIN-NEXT: ret ; ; CHECK32-IZDINXZHINXMIN-LABEL: fcvt_w_s_sat_i16: @@ -6717,15 +6717,15 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_w_s_sat_i16: ; CHECK64-IZDINXZHINXMIN: # %bb.0: # %start ; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: lui a1, 815104 +; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a1, a0, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: lui a2, %hi(.LCPI32_0) ; CHECK64-IZDINXZHINXMIN-NEXT: lw a2, %lo(.LCPI32_0)(a2) -; CHECK64-IZDINXZHINXMIN-NEXT: fmax.s a1, a0, a1 -; CHECK64-IZDINXZHINXMIN-NEXT: feq.s a0, a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: neg a0, a0 -; CHECK64-IZDINXZHINXMIN-NEXT: fmin.s a1, a1, a2 -; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.l.s a1, a1, rtz -; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a0, a1 +; CHECK64-IZDINXZHINXMIN-NEXT: neg a1, a1 +; CHECK64-IZDINXZHINXMIN-NEXT: lui a3, 815104 +; CHECK64-IZDINXZHINXMIN-NEXT: fmax.s a0, a0, a3 +; CHECK64-IZDINXZHINXMIN-NEXT: fmin.s a0, a0, a2 +; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.l.s a0, a0, rtz +; CHECK64-IZDINXZHINXMIN-NEXT: and a0, a1, a0 ; CHECK64-IZDINXZHINXMIN-NEXT: ret start: %0 = tail call i16 @llvm.fptosi.sat.i16.f16(half %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 59eb4b89a2f56e..b0f6bebea0381d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -125,35 +125,20 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { } define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { -; RV32-LABEL: vrgather_shuffle_vx_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vid.v v12 -; RV32-NEXT: li a0, 3 -; RV32-NEXT: lui a1, %hi(.LCPI8_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0) -; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vmul.vx v12, v12, a0 -; RV32-NEXT: vmv.v.i v0, 3 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV32-NEXT: vrgatherei16.vv v10, v8, v12, v0.t -; RV32-NEXT: vmv.v.v v8, v10 -; RV32-NEXT: ret -; -; RV64-LABEL: vrgather_shuffle_vx_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64-NEXT: vid.v v12 -; RV64-NEXT: lui a0, %hi(.LCPI8_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI8_0) -; RV64-NEXT: vlse64.v v10, (a0), zero -; RV64-NEXT: li a0, 3 -; RV64-NEXT: vmul.vx v12, v12, a0 -; RV64-NEXT: vmv.v.i v0, 3 -; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; RV64-NEXT: vrgatherei16.vv v10, v8, v12, v0.t -; RV64-NEXT: vmv.v.v v8, v10 -; RV64-NEXT: ret +; CHECK-LABEL: vrgather_shuffle_vx_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: li a0, 3 +; CHECK-NEXT: lui a1, %hi(.LCPI8_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI8_0) +; CHECK-NEXT: vlse64.v v10, (a1), zero +; CHECK-NEXT: vmul.vx v12, v12, a0 +; CHECK-NEXT: vmv.v.i v0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> ret <4 x double> %s } @@ -279,3 +264,6 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) { %s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> ret <8 x double> %s } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 9f0240c53b219a..1d3c22a02efc0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -774,9 +774,9 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV64VLS-NEXT: vl1re64.v v8, (a0) ; RV64VLS-NEXT: addi a0, sp, 128 ; RV64VLS-NEXT: vs1r.v v8, (a0) -; RV64VLS-NEXT: addi a0, sp, 192 -; RV64VLS-NEXT: vl8re64.v v8, (a0) ; RV64VLS-NEXT: addi a0, sp, 64 +; RV64VLS-NEXT: addi a2, sp, 192 +; RV64VLS-NEXT: vl8re64.v v8, (a2) ; RV64VLS-NEXT: vl8re64.v v16, (a0) ; RV64VLS-NEXT: addi a0, a1, 128 ; RV64VLS-NEXT: vs8r.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 6ba90b00fdba56..4598bf67a23637 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -1142,109 +1142,57 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 declare <128 x i1> @llvm.vp.fcmp.v128f16(<128 x half>, <128 x half>, metadata, <128 x i1>, i32) define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 x i1> %m, i32 zeroext %evl) { -; ZVFH32-LABEL: fcmp_oeq_vv_v128f16: -; ZVFH32: # %bb.0: -; ZVFH32-NEXT: addi sp, sp, -16 -; ZVFH32-NEXT: .cfi_def_cfa_offset 16 -; ZVFH32-NEXT: csrr a1, vlenb -; ZVFH32-NEXT: slli a1, a1, 4 -; ZVFH32-NEXT: sub sp, sp, a1 -; ZVFH32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; ZVFH32-NEXT: addi a1, a0, 128 -; ZVFH32-NEXT: li a3, 64 -; ZVFH32-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; ZVFH32-NEXT: vle16.v v24, (a1) -; ZVFH32-NEXT: csrr a1, vlenb -; ZVFH32-NEXT: slli a1, a1, 3 -; ZVFH32-NEXT: add a1, sp, a1 -; ZVFH32-NEXT: addi a1, a1, 16 -; ZVFH32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFH32-NEXT: vle16.v v24, (a0) -; ZVFH32-NEXT: addi a0, sp, 16 -; ZVFH32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH32-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH32-NEXT: vslidedown.vi v7, v0, 8 -; ZVFH32-NEXT: mv a0, a2 -; ZVFH32-NEXT: bltu a2, a3, .LBB43_2 -; ZVFH32-NEXT: # %bb.1: -; ZVFH32-NEXT: li a0, 64 -; ZVFH32-NEXT: .LBB43_2: -; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH32-NEXT: addi a0, sp, 16 -; ZVFH32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH32-NEXT: vmfeq.vv v6, v8, v24, v0.t -; ZVFH32-NEXT: addi a0, a2, -64 -; ZVFH32-NEXT: sltu a1, a2, a0 -; ZVFH32-NEXT: addi a1, a1, -1 -; ZVFH32-NEXT: and a0, a1, a0 -; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH32-NEXT: vmv1r.v v0, v7 -; ZVFH32-NEXT: csrr a0, vlenb -; ZVFH32-NEXT: slli a0, a0, 3 -; ZVFH32-NEXT: add a0, sp, a0 -; ZVFH32-NEXT: addi a0, a0, 16 -; ZVFH32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH32-NEXT: vmfeq.vv v24, v16, v8, v0.t -; ZVFH32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH32-NEXT: vslideup.vi v6, v24, 8 -; ZVFH32-NEXT: vmv.v.v v0, v6 -; ZVFH32-NEXT: csrr a0, vlenb -; ZVFH32-NEXT: slli a0, a0, 4 -; ZVFH32-NEXT: add sp, sp, a0 -; ZVFH32-NEXT: addi sp, sp, 16 -; ZVFH32-NEXT: ret -; -; ZVFH64-LABEL: fcmp_oeq_vv_v128f16: -; ZVFH64: # %bb.0: -; ZVFH64-NEXT: addi sp, sp, -16 -; ZVFH64-NEXT: .cfi_def_cfa_offset 16 -; ZVFH64-NEXT: csrr a1, vlenb -; ZVFH64-NEXT: slli a1, a1, 4 -; ZVFH64-NEXT: sub sp, sp, a1 -; ZVFH64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; ZVFH64-NEXT: addi a1, a0, 128 -; ZVFH64-NEXT: li a3, 64 -; ZVFH64-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; ZVFH64-NEXT: vle16.v v24, (a1) -; ZVFH64-NEXT: csrr a1, vlenb -; ZVFH64-NEXT: slli a1, a1, 3 -; ZVFH64-NEXT: add a1, sp, a1 -; ZVFH64-NEXT: addi a1, a1, 16 -; ZVFH64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFH64-NEXT: vle16.v v24, (a0) -; ZVFH64-NEXT: addi a0, sp, 16 -; ZVFH64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH64-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH64-NEXT: mv a0, a2 -; ZVFH64-NEXT: vslidedown.vi v7, v0, 8 -; ZVFH64-NEXT: bltu a2, a3, .LBB43_2 -; ZVFH64-NEXT: # %bb.1: -; ZVFH64-NEXT: li a0, 64 -; ZVFH64-NEXT: .LBB43_2: -; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH64-NEXT: addi a0, sp, 16 -; ZVFH64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: vmfeq.vv v6, v8, v24, v0.t -; ZVFH64-NEXT: addi a0, a2, -64 -; ZVFH64-NEXT: sltu a1, a2, a0 -; ZVFH64-NEXT: addi a1, a1, -1 -; ZVFH64-NEXT: and a0, a1, a0 -; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH64-NEXT: vmv1r.v v0, v7 -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 3 -; ZVFH64-NEXT: add a0, sp, a0 -; ZVFH64-NEXT: addi a0, a0, 16 -; ZVFH64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: vmfeq.vv v24, v16, v8, v0.t -; ZVFH64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH64-NEXT: vslideup.vi v6, v24, 8 -; ZVFH64-NEXT: vmv.v.v v0, v6 -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 4 -; ZVFH64-NEXT: add sp, sp, a0 -; ZVFH64-NEXT: addi sp, sp, 16 -; ZVFH64-NEXT: ret +; ZVFH-LABEL: fcmp_oeq_vv_v128f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 4 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFH-NEXT: addi a1, a0, 128 +; ZVFH-NEXT: li a3, 64 +; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma +; ZVFH-NEXT: vle16.v v24, (a1) +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: add a1, sp, a1 +; ZVFH-NEXT: addi a1, a1, 16 +; ZVFH-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFH-NEXT: vle16.v v24, (a0) +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v7, v0, 8 +; ZVFH-NEXT: mv a0, a2 +; ZVFH-NEXT: bltu a2, a3, .LBB43_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: li a0, 64 +; ZVFH-NEXT: .LBB43_2: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v6, v8, v24, v0.t +; ZVFH-NEXT: addi a0, a2, -64 +; ZVFH-NEXT: sltu a1, a2, a0 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: and a0, a1, a0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add a0, sp, a0 +; ZVFH-NEXT: addi a0, a0, 16 +; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v24, v16, v8, v0.t +; ZVFH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVFH-NEXT: vslideup.vi v6, v24, 8 +; ZVFH-NEXT: vmv.v.v v0, v6 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 4 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: ret ; ; ZVFHMIN32-LABEL: fcmp_oeq_vv_v128f16: ; ZVFHMIN32: # %bb.0: @@ -2956,3 +2904,6 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x %v = call <32 x i1> @llvm.vp.fcmp.v32f64(<32 x double> %va, <32 x double> %vb, metadata !"oeq", <32 x i1> %m, i32 %evl) ret <32 x i1> %v } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ZVFH32: {{.*}} +; ZVFH64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 981715bd2b998b..21bbca00921d6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -1243,109 +1243,57 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext declare <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32>, <64 x i32>, metadata, <64 x i1>, i32) define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: icmp_eq_vv_v64i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v24, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 4 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bltu a2, a3, .LBB99_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 32 -; RV32-NEXT: .LBB99_2: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v6, v8, v24, v0.t -; RV32-NEXT: addi a0, a2, -32 -; RV32-NEXT: sltu a1, a2, a0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v24, v16, v8, v0.t -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v6, v24, 4 -; RV32-NEXT: vmv1r.v v0, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: icmp_eq_vv_v64i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: li a3, 32 -; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vle32.v v24, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vle32.v v24, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: vslidedown.vi v7, v0, 4 -; RV64-NEXT: bltu a2, a3, .LBB99_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 32 -; RV64-NEXT: .LBB99_2: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmseq.vv v6, v8, v24, v0.t -; RV64-NEXT: addi a0, a2, -32 -; RV64-NEXT: sltu a1, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmseq.vv v24, v16, v8, v0.t -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v6, v24, 4 -; RV64-NEXT: vmv1r.v v0, v6 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: icmp_eq_vv_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 4 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a3, .LBB99_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: .LBB99_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v6, v8, v24, v0.t +; CHECK-NEXT: addi a0, a2, -32 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v24, v16, v8, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v6, v24, 4 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> %va, <64 x i32> %vb, metadata !"eq", <64 x i1> %m, i32 %evl) ret <64 x i1> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 38e4aab4deb34a..351fc500145eac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2219,8 +2219,8 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsext.vf4 v16, v24 +; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB100_2 @@ -2286,29 +2286,31 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb ; RV64-NEXT: vl4re16.v v24, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v0, v24 ; RV64-NEXT: vsext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vsext.vf4 v16, v24 -; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: vsll.vi v24, v0, 3 ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB101_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB101_2: ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t ; RV64-NEXT: sub a3, a2, a1 ; RV64-NEXT: sltu a2, a2, a3 @@ -2319,15 +2321,13 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret