From b0eb114830caea7f9b8596dc5a9fbccb0ff59129 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 27 Sep 2024 11:39:01 -0700 Subject: [PATCH 1/2] [RISCV] Take known minimum vlen into account when calculating alignment padding in assignRVVStackObjectOffsets. If we know vlen is a multiple of 16, we don't need any alignment padding. I wrote the code so that it would generate the minimum amount of padding if the stack align was 32 or larger or if RVVBitsPerBlock was smaller than half the stack alignment. --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 11 +- .../early-clobber-tied-def-subreg-liveness.ll | 10 +- llvm/test/CodeGen/RISCV/rvv-cfi-info.ll | 53 ++-- .../RISCV/rvv/access-fixed-objects-by-rvv.ll | 4 +- .../RISCV/rvv/addi-scalable-offset.mir | 2 - .../rvv/alloca-load-store-scalable-array.ll | 8 +- .../CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll | 255 ++++++++++++------ .../CodeGen/RISCV/rvv/callee-saved-regs.ll | 2 - .../test/CodeGen/RISCV/rvv/emergency-slot.mir | 2 +- .../rvv/fixed-vectors-fp-buildvec-bf16.ll | 8 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 8 +- .../rvv/fixed-vectors-interleaved-access.ll | 10 +- .../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 16 +- .../RISCV/rvv/fixed-vectors-vfwmaccbf16.ll | 16 +- .../RISCV/rvv/fixed-vectors-vpscatter.ll | 20 +- llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 26 +- llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 26 +- .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 96 +++---- llvm/test/CodeGen/RISCV/rvv/frm-insert.ll | 20 -- .../CodeGen/RISCV/rvv/no-reserved-frame.ll | 1 - .../CodeGen/RISCV/rvv/rv32-spill-vector.ll | 8 - .../CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll | 4 - .../CodeGen/RISCV/rvv/rv64-spill-vector.ll | 4 - .../CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll | 4 - .../CodeGen/RISCV/rvv/scalar-stack-align.ll | 105 +++++--- llvm/test/CodeGen/RISCV/rvv/stack-folding.ll | 32 +-- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 28 +- llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 70 ++--- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 14 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 28 +- .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll | 10 +- llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll | 4 - .../rvv/wrong-stack-offset-for-rvv-object.mir | 29 +- 34 files changed, 473 insertions(+), 489 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 22824b77c37dd..b0c525ea8c299 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1102,16 +1102,25 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); } + uint64_t StackSize = Offset; + + // Multiply by vscale. + if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) + StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; + // Ensure the alignment of the RVV stack. Since we want the most-aligned // object right at the bottom (i.e., any padding at the top of the frame), // readjust all RVV objects down by the alignment padding. - uint64_t StackSize = Offset; if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { StackSize += AlignmentPadding; for (int FI : ObjectsToAllocate) MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); } + // Remove vscale. + if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) + StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; + return std::make_pair(StackSize, RVVStackAlign); } diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 899aad6ed7232..0c2b809c0be20 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -17,10 +17,10 @@ define void @_Z3foov() { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma @@ -83,8 +83,8 @@ define void @_Z3foov() { ; CHECK-NEXT: addi a0, a0, %lo(var_47) ; CHECK-NEXT: vsseg4e16.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll index 93fe66695b70e..225680e846bac 100644 --- a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll +++ b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll @@ -10,9 +10,10 @@ define riscv_vector_cc @test_vector_callee_cfi( @test_vector_callee_cfi( @test_vector_callee_cfi( @test_vector_callee_cfi( @access_fixed_and_vector_objects(ptr %val) { ; RV64IV-NEXT: addi sp, sp, -528 ; RV64IV-NEXT: .cfi_def_cfa_offset 528 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 2 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb ; RV64IV-NEXT: addi a0, sp, 8 ; RV64IV-NEXT: vl1re64.v v8, (a0) ; RV64IV-NEXT: addi a0, sp, 528 @@ -44,7 +43,6 @@ define @access_fixed_and_vector_objects(ptr %val) { ; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: addi sp, sp, 528 ; RV64IV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir index f976adcfe931c..5f0e1a9b9aa24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -38,12 +38,10 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240 ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 undef renamable $v8, killed renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = SUB $x8, killed $x10 ; CHECK-NEXT: $x10 = ADDI killed $x10, -2048 ; CHECK-NEXT: $x10 = ADDI killed $x10, -224 diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll index 1fe91c721f4dd..2e70c3395090e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll @@ -11,9 +11,10 @@ define void @test(ptr %addr) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrrs a1, vlenb, zero -; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 3 * vlenb ; CHECK-NEXT: csrrs a1, vlenb, zero ; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: vl1re64.v v8, (a2) @@ -28,7 +29,8 @@ define void @test(ptr %addr) { ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 90794820ddd84..35e269b911902 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -7,34 +7,13 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL define void @lmul1() nounwind { -; NOZBA-LABEL: lmul1: -; NOZBA: # %bb.0: -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 1 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 1 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul1: -; ZBA: # %bb.0: -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: slli a0, a0, 1 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add sp, a0, sp -; ZBA-NEXT: ret -; -; NOMUL-LABEL: lmul1: -; NOMUL: # %bb.0: -; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 1 -; NOMUL-NEXT: sub sp, sp, a0 -; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 1 -; NOMUL-NEXT: add sp, sp, a0 -; NOMUL-NEXT: ret +; CHECK-LABEL: lmul1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %v = alloca ret void } @@ -192,29 +171,34 @@ define void @lmul2_and_1() nounwind { ; NOZBA-LABEL: lmul2_and_1: ; NOZBA: # %bb.0: ; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 ; NOZBA-NEXT: sub sp, sp, a0 ; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 ; NOZBA-NEXT: add sp, sp, a0 ; NOZBA-NEXT: ret ; ; ZBA-LABEL: lmul2_and_1: ; ZBA: # %bb.0: ; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: slli a0, a0, 2 +; ZBA-NEXT: sh1add a0, a0, a0 ; ZBA-NEXT: sub sp, sp, a0 ; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add sp, a0, sp +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 ; ZBA-NEXT: ret ; ; NOMUL-LABEL: lmul2_and_1: ; NOMUL: # %bb.0: ; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: slli a1, a0, 1 +; NOMUL-NEXT: add a0, a1, a0 ; NOMUL-NEXT: sub sp, sp, a0 ; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: slli a1, a0, 1 +; NOMUL-NEXT: add a0, a1, a0 ; NOMUL-NEXT: add sp, sp, a0 ; NOMUL-NEXT: ret %v1 = alloca @@ -223,63 +207,176 @@ define void @lmul2_and_1() nounwind { } define void @lmul4_and_1() nounwind { -; CHECK-LABEL: lmul4_and_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_1: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_1: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul4_and_1: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -48 +; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 48 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -32 +; NOMUL-NEXT: addi sp, s0, -48 +; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 48 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_2() nounwind { -; CHECK-LABEL: lmul4_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul4_and_2: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -48 +; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 48 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -32 +; NOMUL-NEXT: addi sp, s0, -48 +; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 48 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_2_x2_0() nounwind { -; CHECK-LABEL: lmul4_and_2_x2_0: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_2_x2_0: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 14 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_2_x2_0: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: li a1, 14 +; ZBA-NEXT: mul a0, a0, a1 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul4_and_2_x2_0: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -48 +; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 48 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a1, a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -32 +; NOMUL-NEXT: addi sp, s0, -48 +; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 48 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca %v3 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll index 2177bbfe5b2a4..c1ce2e988fc51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll @@ -7,7 +7,6 @@ define @test_vector_std( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -15,7 +14,6 @@ define @test_vector_std( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index 600084632ce68..c4bc794b8aeb3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -83,7 +83,7 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -272 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x11 = frame-setup ADDI $x0, 52 + ; CHECK-NEXT: $x11 = frame-setup ADDI $x0, 51 ; CHECK-NEXT: $x10 = frame-setup MUL killed $x10, killed $x11 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-setup ANDI $x2, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll index 727e03125176a..bdedc5f33c3a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll @@ -26,9 +26,8 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32-ZFBFMIN-NEXT: .cfi_offset ra, -4 ; RV32-ZFBFMIN-NEXT: csrr a1, vlenb -; RV32-ZFBFMIN-NEXT: slli a1, a1, 1 ; RV32-ZFBFMIN-NEXT: sub sp, sp, a1 -; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV32-ZFBFMIN-NEXT: addi a1, sp, 32 ; RV32-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-ZFBFMIN-NEXT: andi a0, a0, 3 @@ -43,7 +42,6 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFBFMIN-NEXT: csrr a0, vlenb -; RV32-ZFBFMIN-NEXT: slli a0, a0, 1 ; RV32-ZFBFMIN-NEXT: add sp, sp, a0 ; RV32-ZFBFMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-ZFBFMIN-NEXT: addi sp, sp, 48 @@ -56,9 +54,8 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-ZFBFMIN-NEXT: .cfi_offset ra, -8 ; RV64-ZFBFMIN-NEXT: csrr a1, vlenb -; RV64-ZFBFMIN-NEXT: slli a1, a1, 1 ; RV64-ZFBFMIN-NEXT: sub sp, sp, a1 -; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV64-ZFBFMIN-NEXT: addi a1, sp, 32 ; RV64-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-ZFBFMIN-NEXT: andi a0, a0, 3 @@ -73,7 +70,6 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFBFMIN-NEXT: csrr a0, vlenb -; RV64-ZFBFMIN-NEXT: slli a0, a0, 1 ; RV64-ZFBFMIN-NEXT: add sp, sp, a0 ; RV64-ZFBFMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-ZFBFMIN-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index bf2eb3ff0261a..53059a4f28d42 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -206,9 +206,8 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32-ZFHMIN-NEXT: .cfi_offset ra, -4 ; RV32-ZFHMIN-NEXT: csrr a1, vlenb -; RV32-ZFHMIN-NEXT: slli a1, a1, 1 ; RV32-ZFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV32-ZFHMIN-NEXT: addi a1, sp, 32 ; RV32-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-ZFHMIN-NEXT: andi a0, a0, 3 @@ -223,7 +222,6 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: lh a0, 0(a0) ; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFHMIN-NEXT: csrr a0, vlenb -; RV32-ZFHMIN-NEXT: slli a0, a0, 1 ; RV32-ZFHMIN-NEXT: add sp, sp, a0 ; RV32-ZFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-ZFHMIN-NEXT: addi sp, sp, 48 @@ -236,9 +234,8 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-ZFHMIN-NEXT: .cfi_offset ra, -8 ; RV64-ZFHMIN-NEXT: csrr a1, vlenb -; RV64-ZFHMIN-NEXT: slli a1, a1, 1 ; RV64-ZFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV64-ZFHMIN-NEXT: addi a1, sp, 32 ; RV64-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-ZFHMIN-NEXT: andi a0, a0, 3 @@ -253,7 +250,6 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: lh a0, 0(a0) ; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFHMIN-NEXT: csrr a0, vlenb -; RV64-ZFHMIN-NEXT: slli a0, a0, 1 ; RV64-ZFHMIN-NEXT: add sp, sp, a0 ; RV64-ZFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-ZFHMIN-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index eff56e408d6d5..5911e8248f299 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -641,10 +641,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 66 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 6 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 66 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc1, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 65 * vlenb ; RV64-NEXT: addi a2, a1, 256 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v16, (a2) @@ -1065,8 +1065,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 66 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 6 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index d52cbb54c4b2d..805b371f1e3d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -104,9 +104,10 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 @@ -167,7 +168,8 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -210,9 +212,10 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 @@ -273,7 +276,8 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll index 62a479bdedf64..b953cf1f5bed8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll @@ -33,9 +33,8 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN32-NEXT: .cfi_offset s0, -8 ; ZVFBMIN32-NEXT: .cfi_offset fs0, -16 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: sub sp, sp, a0 -; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb +; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb ; ZVFBMIN32-NEXT: fmv.s fs0, fa0 ; ZVFBMIN32-NEXT: addi a0, sp, 16 ; ZVFBMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -58,7 +57,6 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN32-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: add sp, sp, a0 ; ZVFBMIN32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; ZVFBMIN32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -77,9 +75,8 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN64-NEXT: .cfi_offset s0, -16 ; ZVFBMIN64-NEXT: .cfi_offset fs0, -24 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: sub sp, sp, a0 -; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; ZVFBMIN64-NEXT: fmv.s fs0, fa0 ; ZVFBMIN64-NEXT: addi a0, sp, 32 ; ZVFBMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -104,7 +101,6 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN64-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: add sp, sp, a0 ; ZVFBMIN64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; ZVFBMIN64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -144,9 +140,8 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN32-NEXT: .cfi_offset ra, -4 ; ZVFBMIN32-NEXT: .cfi_offset fs0, -16 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: sub sp, sp, a0 -; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; ZVFBMIN32-NEXT: fmv.s fs0, fa0 ; ZVFBMIN32-NEXT: addi a0, sp, 32 ; ZVFBMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -167,7 +162,6 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN32-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: add sp, sp, a0 ; ZVFBMIN32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; ZVFBMIN32-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload @@ -183,9 +177,8 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN64-NEXT: .cfi_offset ra, -8 ; ZVFBMIN64-NEXT: .cfi_offset fs0, -16 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: sub sp, sp, a0 -; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; ZVFBMIN64-NEXT: fmv.s fs0, fa0 ; ZVFBMIN64-NEXT: addi a0, sp, 32 ; ZVFBMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -208,7 +201,6 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN64-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: add sp, sp, a0 ; ZVFBMIN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; ZVFBMIN64-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index 0c180cd148b81..c055039876191 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1844,10 +1844,10 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1888,8 +1888,8 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1931,10 +1931,10 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1975,8 +1975,8 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index 320db35770cb8..dd01e1c1ee66d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -294,10 +294,10 @@ define @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -482,7 +483,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -593,9 +595,10 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -643,7 +646,8 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -764,9 +768,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -815,7 +820,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2250,9 +2256,8 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2319,7 +2324,6 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2380,9 +2384,8 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2404,7 +2407,6 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2489,9 +2491,8 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2540,7 +2541,6 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2648,9 +2648,8 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2717,7 +2716,6 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2778,9 +2776,8 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2802,7 +2799,6 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2887,9 +2883,8 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2938,7 +2933,6 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -3767,9 +3761,10 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -3817,7 +3812,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -3926,9 +3922,10 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -3976,7 +3973,8 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -4096,9 +4094,10 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -4147,7 +4146,8 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -5568,9 +5568,8 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5640,7 +5639,6 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5699,9 +5697,8 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5726,7 +5723,6 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5799,9 +5795,8 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5839,7 +5834,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5948,9 +5942,8 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6020,7 +6013,6 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -6079,9 +6071,8 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -6106,7 +6097,6 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -6179,9 +6169,8 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6219,7 +6208,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll index 0e102d98c79cf..ccfe94ecad286 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll @@ -87,7 +87,6 @@ define @just_call( %0) nounwind { ; CHECK-NEXT: addi sp, sp, -48 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: addi a0, sp, 32 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -95,7 +94,6 @@ define @just_call( %0) nounwind { ; CHECK-NEXT: addi a0, sp, 32 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 48 @@ -106,7 +104,6 @@ define @just_call( %0) nounwind { ; UNOPT-NEXT: addi sp, sp, -48 ; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; UNOPT-NEXT: csrr a0, vlenb -; UNOPT-NEXT: slli a0, a0, 1 ; UNOPT-NEXT: sub sp, sp, a0 ; UNOPT-NEXT: addi a0, sp, 32 ; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -114,7 +111,6 @@ define @just_call( %0) nounwind { ; UNOPT-NEXT: addi a0, sp, 32 ; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; UNOPT-NEXT: csrr a0, vlenb -; UNOPT-NEXT: slli a0, a0, 1 ; UNOPT-NEXT: add sp, sp, a0 ; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; UNOPT-NEXT: addi sp, sp, 48 @@ -130,7 +126,6 @@ define @before_call1( %0, @before_call1( %0, @before_call1( %0, @before_call1( %0, @before_call2( %0, @before_call2( %0, @before_call2( %0, @before_call2( %0, @after_call1( %0, @after_call1( %0, @after_call1( %0, @after_call1( %0, @after_call2( %0, @after_call2( %0, @after_call2( %0, @after_call2( %0, @spill_lmul_mf2( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -35,7 +32,6 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -51,7 +47,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -60,7 +55,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -69,7 +63,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -77,7 +70,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll index c12df627b49d6..b34952b64f09e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -11,7 +11,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma @@ -24,7 +23,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -88,7 +86,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma @@ -101,7 +98,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll index 30075c2dad516..1e6ff0baddaef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -9,7 +9,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -35,7 +32,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll index afb4b1560728c..361adb55ef12f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -11,7 +11,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma @@ -24,7 +23,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -88,7 +86,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma @@ -101,7 +98,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll index 409ef50aa53c8..38cc4dd9e40a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll +++ b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll @@ -1,49 +1,79 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefixes=RV32,RV32-ZVE64 ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: | FileCheck %s --check-prefixes=RV64,RV64-ZVE64 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefixes=RV32,RV32-V ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: | FileCheck %s --check-prefixes=RV64,RV64-V ; FIXME: We are over-aligning the stack on V, wasting stack space. define ptr @scalar_stack_align16() nounwind { -; RV32-LABEL: scalar_stack_align16: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: call extern -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: add sp, sp, a1 -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret +; RV32-ZVE64-LABEL: scalar_stack_align16: +; RV32-ZVE64: # %bb.0: +; RV32-ZVE64-NEXT: addi sp, sp, -48 +; RV32-ZVE64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-ZVE64-NEXT: csrr a0, vlenb +; RV32-ZVE64-NEXT: slli a0, a0, 1 +; RV32-ZVE64-NEXT: sub sp, sp, a0 +; RV32-ZVE64-NEXT: addi a0, sp, 32 +; RV32-ZVE64-NEXT: call extern +; RV32-ZVE64-NEXT: addi a0, sp, 16 +; RV32-ZVE64-NEXT: csrr a1, vlenb +; RV32-ZVE64-NEXT: slli a1, a1, 1 +; RV32-ZVE64-NEXT: add sp, sp, a1 +; RV32-ZVE64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-ZVE64-NEXT: addi sp, sp, 48 +; RV32-ZVE64-NEXT: ret ; -; RV64-LABEL: scalar_stack_align16: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -48 -; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: call extern -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add sp, sp, a1 -; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 48 -; RV64-NEXT: ret +; RV64-ZVE64-LABEL: scalar_stack_align16: +; RV64-ZVE64: # %bb.0: +; RV64-ZVE64-NEXT: addi sp, sp, -48 +; RV64-ZVE64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-ZVE64-NEXT: csrr a0, vlenb +; RV64-ZVE64-NEXT: slli a0, a0, 1 +; RV64-ZVE64-NEXT: sub sp, sp, a0 +; RV64-ZVE64-NEXT: addi a0, sp, 32 +; RV64-ZVE64-NEXT: call extern +; RV64-ZVE64-NEXT: addi a0, sp, 16 +; RV64-ZVE64-NEXT: csrr a1, vlenb +; RV64-ZVE64-NEXT: slli a1, a1, 1 +; RV64-ZVE64-NEXT: add sp, sp, a1 +; RV64-ZVE64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-ZVE64-NEXT: addi sp, sp, 48 +; RV64-ZVE64-NEXT: ret +; +; RV32-V-LABEL: scalar_stack_align16: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -48 +; RV32-V-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: sub sp, sp, a0 +; RV32-V-NEXT: addi a0, sp, 32 +; RV32-V-NEXT: call extern +; RV32-V-NEXT: addi a0, sp, 16 +; RV32-V-NEXT: csrr a1, vlenb +; RV32-V-NEXT: add sp, sp, a1 +; RV32-V-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-V-NEXT: addi sp, sp, 48 +; RV32-V-NEXT: ret +; +; RV64-V-LABEL: scalar_stack_align16: +; RV64-V: # %bb.0: +; RV64-V-NEXT: addi sp, sp, -48 +; RV64-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: sub sp, sp, a0 +; RV64-V-NEXT: addi a0, sp, 32 +; RV64-V-NEXT: call extern +; RV64-V-NEXT: addi a0, sp, 16 +; RV64-V-NEXT: csrr a1, vlenb +; RV64-V-NEXT: add sp, sp, a1 +; RV64-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-V-NEXT: addi sp, sp, 48 +; RV64-V-NEXT: ret %a = alloca %c = alloca i64, align 16 call void @extern(ptr %a) @@ -51,3 +81,6 @@ define ptr @scalar_stack_align16() nounwind { } declare void @extern(ptr) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll index f966835622a9f..ffe6ff8a91abd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -8,9 +8,8 @@ define i64 @i64( %v, i1 %c) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: andi a0, a0, 1 @@ -29,7 +28,6 @@ define i64 @i64( %v, i1 %c) { ; RV32-NEXT: li a1, 0 ; RV32-NEXT: .LBB0_3: # %falsebb ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: add sp, sp, a2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -39,9 +37,8 @@ define i64 @i64( %v, i1 %c) { ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: andi a0, a0, 1 @@ -52,7 +49,6 @@ define i64 @i64( %v, i1 %c) { ; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64-NEXT: .LBB0_2: # %falsebb ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add sp, sp, a1 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -71,9 +67,8 @@ define i32 @i32( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -84,7 +79,6 @@ define i32 @i32( %v, i1 %c) { ; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: .LBB1_2: # %falsebb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -103,9 +97,8 @@ define i16 @i16( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -116,7 +109,6 @@ define i16 @i16( %v, i1 %c) { ; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: .LBB2_2: # %falsebb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -135,9 +127,8 @@ define i8 @i8( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -148,7 +139,6 @@ define i8 @i8( %v, i1 %c) { ; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: .LBB3_2: # %falsebb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -167,9 +157,8 @@ define double @f64( %v, i1 %c) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: andi a0, a0, 1 @@ -183,7 +172,6 @@ define double @f64( %v, i1 %c) { ; RV32-NEXT: fcvt.d.w fa0, zero ; RV32-NEXT: .LBB4_3: # %falsebb ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -193,9 +181,8 @@ define double @f64( %v, i1 %c) { ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: andi a0, a0, 1 @@ -209,7 +196,6 @@ define double @f64( %v, i1 %c) { ; RV64-NEXT: fmv.d.x fa0, zero ; RV64-NEXT: .LBB4_3: # %falsebb ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -228,9 +214,8 @@ define float @f32( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -244,7 +229,6 @@ define float @f32( %v, i1 %c) { ; CHECK-NEXT: fmv.w.x fa0, zero ; CHECK-NEXT: .LBB5_3: # %falsebb ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index b3de904d20622..4c298ab2b5e6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -508,12 +508,10 @@ define @vfadd_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -577,10 +575,8 @@ define @vfadd_vf_nxv32bf16( %va, bf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1301,12 +1297,10 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -1370,10 +1364,8 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index aa39fe5b5ec85..0fe6c5dec4264 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -470,12 +470,10 @@ define @vfdiv_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -539,10 +537,8 @@ define @vfdiv_vf_nxv32bf16( %va, bf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1213,12 +1209,10 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -1282,10 +1276,8 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index baecb7bb7d248..f0c74d064016a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -642,14 +642,13 @@ define @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a2, a2, a1 ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a1 @@ -1038,9 +1035,8 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 @@ -1059,14 +1055,13 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a2, a2, a1 ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a1 @@ -1200,9 +1195,8 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 @@ -2247,14 +2241,13 @@ define @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a2, a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 2 ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 @@ -2656,9 +2647,8 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a1, a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 2 ; ZVFHMIN-NEXT: add a0, a0, a1 @@ -2683,14 +2673,13 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a2, a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 2 ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 @@ -2824,9 +2813,8 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a1, a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 2 ; ZVFHMIN-NEXT: add a0, a0, a1 @@ -8663,14 +8651,13 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -673,10 +671,8 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 449130e59876f..dd57b65b50f4f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -470,12 +470,10 @@ define @vfsub_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -539,10 +537,8 @@ define @vfsub_vf_nxv32bf16( %va, bf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1213,12 +1209,10 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -1282,10 +1276,8 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 4d715c7031000..0028f3035c273 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2469,10 +2469,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: vl4re16.v v24, (a1) ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill @@ -2509,8 +2509,8 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll index a869b433a4952..72f25268109a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll @@ -75,7 +75,6 @@ define @test3( %0, %1, @test3( %0, %1, @test3( %0, %1, @test3( %0, %1, Date: Sun, 29 Sep 2024 23:21:41 -0700 Subject: [PATCH 2/2] fixup! remove FIXME. --- llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll index 38cc4dd9e40a7..fcb5f07664aa5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll +++ b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll @@ -8,8 +8,6 @@ ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=RV64,RV64-V -; FIXME: We are over-aligning the stack on V, wasting stack space. - define ptr @scalar_stack_align16() nounwind { ; RV32-ZVE64-LABEL: scalar_stack_align16: ; RV32-ZVE64: # %bb.0: