diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 22824b77c37dd..b0c525ea8c299 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1102,16 +1102,25 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { RVVStackAlign = std::max(RVVStackAlign, ObjectAlign); } + uint64_t StackSize = Offset; + + // Multiply by vscale. + if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) + StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; + // Ensure the alignment of the RVV stack. Since we want the most-aligned // object right at the bottom (i.e., any padding at the top of the frame), // readjust all RVV objects down by the alignment padding. - uint64_t StackSize = Offset; if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { StackSize += AlignmentPadding; for (int FI : ObjectsToAllocate) MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); } + // Remove vscale. + if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) + StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; + return std::make_pair(StackSize, RVVStackAlign); } diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 899aad6ed7232..0c2b809c0be20 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -17,10 +17,10 @@ define void @_Z3foov() { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma @@ -83,8 +83,8 @@ define void @_Z3foov() { ; CHECK-NEXT: addi a0, a0, %lo(var_47) ; CHECK-NEXT: vsseg4e16.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll index 93fe66695b70e..225680e846bac 100644 --- a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll +++ b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll @@ -10,9 +10,10 @@ define riscv_vector_cc @test_vector_callee_cfi( @test_vector_callee_cfi( @test_vector_callee_cfi( @test_vector_callee_cfi( @access_fixed_and_vector_objects(ptr %val) { ; RV64IV-NEXT: addi sp, sp, -528 ; RV64IV-NEXT: .cfi_def_cfa_offset 528 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 2 * vlenb +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x04, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 528 + 1 * vlenb ; RV64IV-NEXT: addi a0, sp, 8 ; RV64IV-NEXT: vl1re64.v v8, (a0) ; RV64IV-NEXT: addi a0, sp, 528 @@ -44,7 +43,6 @@ define @access_fixed_and_vector_objects(ptr %val) { ; RV64IV-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV64IV-NEXT: vadd.vv v8, v8, v9 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 ; RV64IV-NEXT: addi sp, sp, 528 ; RV64IV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir index f976adcfe931c..5f0e1a9b9aa24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -38,12 +38,10 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -240 ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 undef renamable $v8, killed renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x10 = PseudoReadVLENB - ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = SUB $x8, killed $x10 ; CHECK-NEXT: $x10 = ADDI killed $x10, -2048 ; CHECK-NEXT: $x10 = ADDI killed $x10, -224 diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll index 1fe91c721f4dd..2e70c3395090e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll @@ -11,9 +11,10 @@ define void @test(ptr %addr) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrrs a1, vlenb, zero -; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 3 * vlenb ; CHECK-NEXT: csrrs a1, vlenb, zero ; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: vl1re64.v v8, (a2) @@ -28,7 +29,8 @@ define void @test(ptr %addr) { ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: csrrs a0, vlenb, zero -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 90794820ddd84..35e269b911902 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -7,34 +7,13 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL define void @lmul1() nounwind { -; NOZBA-LABEL: lmul1: -; NOZBA: # %bb.0: -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 1 -; NOZBA-NEXT: sub sp, sp, a0 -; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 1 -; NOZBA-NEXT: add sp, sp, a0 -; NOZBA-NEXT: ret -; -; ZBA-LABEL: lmul1: -; ZBA: # %bb.0: -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: slli a0, a0, 1 -; ZBA-NEXT: sub sp, sp, a0 -; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh1add sp, a0, sp -; ZBA-NEXT: ret -; -; NOMUL-LABEL: lmul1: -; NOMUL: # %bb.0: -; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 1 -; NOMUL-NEXT: sub sp, sp, a0 -; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 1 -; NOMUL-NEXT: add sp, sp, a0 -; NOMUL-NEXT: ret +; CHECK-LABEL: lmul1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %v = alloca ret void } @@ -192,29 +171,34 @@ define void @lmul2_and_1() nounwind { ; NOZBA-LABEL: lmul2_and_1: ; NOZBA: # %bb.0: ; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 ; NOZBA-NEXT: sub sp, sp, a0 ; NOZBA-NEXT: csrr a0, vlenb -; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: slli a1, a0, 1 +; NOZBA-NEXT: add a0, a1, a0 ; NOZBA-NEXT: add sp, sp, a0 ; NOZBA-NEXT: ret ; ; ZBA-LABEL: lmul2_and_1: ; ZBA: # %bb.0: ; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: slli a0, a0, 2 +; ZBA-NEXT: sh1add a0, a0, a0 ; ZBA-NEXT: sub sp, sp, a0 ; ZBA-NEXT: csrr a0, vlenb -; ZBA-NEXT: sh2add sp, a0, sp +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: add sp, sp, a0 ; ZBA-NEXT: ret ; ; NOMUL-LABEL: lmul2_and_1: ; NOMUL: # %bb.0: ; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: slli a1, a0, 1 +; NOMUL-NEXT: add a0, a1, a0 ; NOMUL-NEXT: sub sp, sp, a0 ; NOMUL-NEXT: csrr a0, vlenb -; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: slli a1, a0, 1 +; NOMUL-NEXT: add a0, a1, a0 ; NOMUL-NEXT: add sp, sp, a0 ; NOMUL-NEXT: ret %v1 = alloca @@ -223,63 +207,176 @@ define void @lmul2_and_1() nounwind { } define void @lmul4_and_1() nounwind { -; CHECK-LABEL: lmul4_and_1: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_1: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_1: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul4_and_1: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -48 +; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 48 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -32 +; NOMUL-NEXT: addi sp, s0, -48 +; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 48 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_2() nounwind { -; CHECK-LABEL: lmul4_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 6 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul4_and_2: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -48 +; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 48 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -32 +; NOMUL-NEXT: addi sp, s0, -48 +; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 48 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca ret void } define void @lmul4_and_2_x2_0() nounwind { -; CHECK-LABEL: lmul4_and_2_x2_0: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 48 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: addi sp, s0, -48 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul4_and_2_x2_0: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -48 +; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 48 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 14 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -32 +; NOZBA-NEXT: addi sp, s0, -48 +; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 48 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul4_and_2_x2_0: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -48 +; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 48 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: li a1, 14 +; ZBA-NEXT: mul a0, a0, a1 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -32 +; ZBA-NEXT: addi sp, s0, -48 +; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 48 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul4_and_2_x2_0: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -48 +; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 48 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a1, a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -32 +; NOMUL-NEXT: addi sp, s0, -48 +; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 48 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca %v3 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll index 2177bbfe5b2a4..c1ce2e988fc51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll @@ -7,7 +7,6 @@ define @test_vector_std( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -15,7 +14,6 @@ define @test_vector_std( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index 600084632ce68..c4bc794b8aeb3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -83,7 +83,7 @@ body: | ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $x8, 0 ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -272 ; CHECK-NEXT: $x10 = frame-setup PseudoReadVLENB - ; CHECK-NEXT: $x11 = frame-setup ADDI $x0, 52 + ; CHECK-NEXT: $x11 = frame-setup ADDI $x0, 51 ; CHECK-NEXT: $x10 = frame-setup MUL killed $x10, killed $x11 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x10 ; CHECK-NEXT: $x2 = frame-setup ANDI $x2, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll index 727e03125176a..bdedc5f33c3a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll @@ -26,9 +26,8 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32-ZFBFMIN-NEXT: .cfi_offset ra, -4 ; RV32-ZFBFMIN-NEXT: csrr a1, vlenb -; RV32-ZFBFMIN-NEXT: slli a1, a1, 1 ; RV32-ZFBFMIN-NEXT: sub sp, sp, a1 -; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV32-ZFBFMIN-NEXT: addi a1, sp, 32 ; RV32-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-ZFBFMIN-NEXT: andi a0, a0, 3 @@ -43,7 +42,6 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV32-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFBFMIN-NEXT: csrr a0, vlenb -; RV32-ZFBFMIN-NEXT: slli a0, a0, 1 ; RV32-ZFBFMIN-NEXT: add sp, sp, a0 ; RV32-ZFBFMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-ZFBFMIN-NEXT: addi sp, sp, 48 @@ -56,9 +54,8 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-ZFBFMIN-NEXT: .cfi_offset ra, -8 ; RV64-ZFBFMIN-NEXT: csrr a1, vlenb -; RV64-ZFBFMIN-NEXT: slli a1, a1, 1 ; RV64-ZFBFMIN-NEXT: sub sp, sp, a1 -; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV64-ZFBFMIN-NEXT: addi a1, sp, 32 ; RV64-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-ZFBFMIN-NEXT: andi a0, a0, 3 @@ -73,7 +70,6 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) { ; RV64-ZFBFMIN-NEXT: lh a0, 0(a0) ; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFBFMIN-NEXT: csrr a0, vlenb -; RV64-ZFBFMIN-NEXT: slli a0, a0, 1 ; RV64-ZFBFMIN-NEXT: add sp, sp, a0 ; RV64-ZFBFMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-ZFBFMIN-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index bf2eb3ff0261a..53059a4f28d42 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -206,9 +206,8 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32-ZFHMIN-NEXT: .cfi_offset ra, -4 ; RV32-ZFHMIN-NEXT: csrr a1, vlenb -; RV32-ZFHMIN-NEXT: slli a1, a1, 1 ; RV32-ZFHMIN-NEXT: sub sp, sp, a1 -; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV32-ZFHMIN-NEXT: addi a1, sp, 32 ; RV32-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-ZFHMIN-NEXT: andi a0, a0, 3 @@ -223,7 +222,6 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV32-ZFHMIN-NEXT: lh a0, 0(a0) ; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV32-ZFHMIN-NEXT: csrr a0, vlenb -; RV32-ZFHMIN-NEXT: slli a0, a0, 1 ; RV32-ZFHMIN-NEXT: add sp, sp, a0 ; RV32-ZFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-ZFHMIN-NEXT: addi sp, sp, 48 @@ -236,9 +234,8 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64-ZFHMIN-NEXT: .cfi_offset ra, -8 ; RV64-ZFHMIN-NEXT: csrr a1, vlenb -; RV64-ZFHMIN-NEXT: slli a1, a1, 1 ; RV64-ZFHMIN-NEXT: sub sp, sp, a1 -; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; RV64-ZFHMIN-NEXT: addi a1, sp, 32 ; RV64-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-ZFHMIN-NEXT: andi a0, a0, 3 @@ -253,7 +250,6 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) { ; RV64-ZFHMIN-NEXT: lh a0, 0(a0) ; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0 ; RV64-ZFHMIN-NEXT: csrr a0, vlenb -; RV64-ZFHMIN-NEXT: slli a0, a0, 1 ; RV64-ZFHMIN-NEXT: add sp, sp, a0 ; RV64-ZFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-ZFHMIN-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index eff56e408d6d5..5911e8248f299 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -641,10 +641,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 66 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 6 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 66 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc1, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 65 * vlenb ; RV64-NEXT: addi a2, a1, 256 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v16, (a2) @@ -1065,8 +1065,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 66 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 6 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index d52cbb54c4b2d..805b371f1e3d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -104,9 +104,10 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 @@ -167,7 +168,8 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 @@ -210,9 +212,10 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 4 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 3 * vlenb ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add a0, sp, a0 @@ -273,7 +276,8 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll index 62a479bdedf64..b953cf1f5bed8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmaccbf16.ll @@ -33,9 +33,8 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN32-NEXT: .cfi_offset s0, -8 ; ZVFBMIN32-NEXT: .cfi_offset fs0, -16 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: sub sp, sp, a0 -; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb +; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb ; ZVFBMIN32-NEXT: fmv.s fs0, fa0 ; ZVFBMIN32-NEXT: addi a0, sp, 16 ; ZVFBMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -58,7 +57,6 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN32-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: add sp, sp, a0 ; ZVFBMIN32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; ZVFBMIN32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -77,9 +75,8 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN64-NEXT: .cfi_offset s0, -16 ; ZVFBMIN64-NEXT: .cfi_offset fs0, -24 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: sub sp, sp, a0 -; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; ZVFBMIN64-NEXT: fmv.s fs0, fa0 ; ZVFBMIN64-NEXT: addi a0, sp, 32 ; ZVFBMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -104,7 +101,6 @@ define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x b ; ZVFBMIN64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN64-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: add sp, sp, a0 ; ZVFBMIN64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; ZVFBMIN64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -144,9 +140,8 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN32-NEXT: .cfi_offset ra, -4 ; ZVFBMIN32-NEXT: .cfi_offset fs0, -16 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: sub sp, sp, a0 -; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; ZVFBMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; ZVFBMIN32-NEXT: fmv.s fs0, fa0 ; ZVFBMIN32-NEXT: addi a0, sp, 32 ; ZVFBMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -167,7 +162,6 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN32-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN32-NEXT: csrr a0, vlenb -; ZVFBMIN32-NEXT: slli a0, a0, 1 ; ZVFBMIN32-NEXT: add sp, sp, a0 ; ZVFBMIN32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; ZVFBMIN32-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload @@ -183,9 +177,8 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN64-NEXT: .cfi_offset ra, -8 ; ZVFBMIN64-NEXT: .cfi_offset fs0, -16 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: sub sp, sp, a0 -; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; ZVFBMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb ; ZVFBMIN64-NEXT: fmv.s fs0, fa0 ; ZVFBMIN64-NEXT: addi a0, sp, 32 ; ZVFBMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -208,7 +201,6 @@ define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> ; ZVFBMIN64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; ZVFBMIN64-NEXT: vfmacc.vv v8, v10, v9 ; ZVFBMIN64-NEXT: csrr a0, vlenb -; ZVFBMIN64-NEXT: slli a0, a0, 1 ; ZVFBMIN64-NEXT: add sp, sp, a0 ; ZVFBMIN64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; ZVFBMIN64-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index 0c180cd148b81..c055039876191 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1844,10 +1844,10 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1888,8 +1888,8 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1931,10 +1931,10 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1975,8 +1975,8 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index 320db35770cb8..dd01e1c1ee66d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -294,10 +294,10 @@ define @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -482,7 +483,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -593,9 +595,10 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -643,7 +646,8 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -764,9 +768,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -815,7 +820,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2250,9 +2256,8 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2319,7 +2324,6 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2380,9 +2384,8 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2404,7 +2407,6 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2489,9 +2491,8 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2540,7 +2541,6 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2648,9 +2648,8 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2717,7 +2716,6 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2778,9 +2776,8 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2802,7 +2799,6 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2887,9 +2883,8 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2938,7 +2933,6 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -3767,9 +3761,10 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -3817,7 +3812,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclip.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -3926,9 +3922,10 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -3976,7 +3973,8 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -4096,9 +4094,10 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 3 * vlenb ; CHECK-V-NEXT: lhu s0, 0(a0) ; CHECK-V-NEXT: lhu s1, 8(a0) ; CHECK-V-NEXT: lhu s2, 16(a0) @@ -4147,7 +4146,8 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnclipu.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -5568,9 +5568,8 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5640,7 +5639,6 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5699,9 +5697,8 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5726,7 +5723,6 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5799,9 +5795,8 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5839,7 +5834,6 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5948,9 +5942,8 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6020,7 +6013,6 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -6079,9 +6071,8 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -6106,7 +6097,6 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -6179,9 +6169,8 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 1 * vlenb ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6219,7 +6208,6 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll index 0e102d98c79cf..ccfe94ecad286 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll @@ -87,7 +87,6 @@ define @just_call( %0) nounwind { ; CHECK-NEXT: addi sp, sp, -48 ; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: addi a0, sp, 32 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -95,7 +94,6 @@ define @just_call( %0) nounwind { ; CHECK-NEXT: addi a0, sp, 32 ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 48 @@ -106,7 +104,6 @@ define @just_call( %0) nounwind { ; UNOPT-NEXT: addi sp, sp, -48 ; UNOPT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; UNOPT-NEXT: csrr a0, vlenb -; UNOPT-NEXT: slli a0, a0, 1 ; UNOPT-NEXT: sub sp, sp, a0 ; UNOPT-NEXT: addi a0, sp, 32 ; UNOPT-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -114,7 +111,6 @@ define @just_call( %0) nounwind { ; UNOPT-NEXT: addi a0, sp, 32 ; UNOPT-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; UNOPT-NEXT: csrr a0, vlenb -; UNOPT-NEXT: slli a0, a0, 1 ; UNOPT-NEXT: add sp, sp, a0 ; UNOPT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; UNOPT-NEXT: addi sp, sp, 48 @@ -130,7 +126,6 @@ define @before_call1( %0, @before_call1( %0, @before_call1( %0, @before_call1( %0, @before_call2( %0, @before_call2( %0, @before_call2( %0, @before_call2( %0, @after_call1( %0, @after_call1( %0, @after_call1( %0, @after_call1( %0, @after_call2( %0, @after_call2( %0, @after_call2( %0, @after_call2( %0, @spill_lmul_mf2( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -35,7 +32,6 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -51,7 +47,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -60,7 +55,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -69,7 +63,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -77,7 +70,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll index c12df627b49d6..b34952b64f09e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -11,7 +11,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma @@ -24,7 +23,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -88,7 +86,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma @@ -101,7 +98,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll index 30075c2dad516..1e6ff0baddaef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -9,7 +9,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -18,7 +17,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,7 +25,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -35,7 +32,6 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll index afb4b1560728c..361adb55ef12f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -11,7 +11,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, tu, ma @@ -24,7 +23,6 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -88,7 +86,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 ; SPILL-O0-NEXT: # implicit-def: $v8_v9 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, tu, ma @@ -101,7 +98,6 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll index 409ef50aa53c8..fcb5f07664aa5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll +++ b/llvm/test/CodeGen/RISCV/rvv/scalar-stack-align.ll @@ -1,49 +1,77 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefixes=RV32,RV32-ZVE64 ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: | FileCheck %s --check-prefixes=RV64,RV64-ZVE64 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: | FileCheck %s --check-prefixes=RV32,RV32-V ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=RV64 - -; FIXME: We are over-aligning the stack on V, wasting stack space. +; RUN: | FileCheck %s --check-prefixes=RV64,RV64-V define ptr @scalar_stack_align16() nounwind { -; RV32-LABEL: scalar_stack_align16: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: addi a0, sp, 32 -; RV32-NEXT: call extern -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: add sp, sp, a1 -; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret +; RV32-ZVE64-LABEL: scalar_stack_align16: +; RV32-ZVE64: # %bb.0: +; RV32-ZVE64-NEXT: addi sp, sp, -48 +; RV32-ZVE64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-ZVE64-NEXT: csrr a0, vlenb +; RV32-ZVE64-NEXT: slli a0, a0, 1 +; RV32-ZVE64-NEXT: sub sp, sp, a0 +; RV32-ZVE64-NEXT: addi a0, sp, 32 +; RV32-ZVE64-NEXT: call extern +; RV32-ZVE64-NEXT: addi a0, sp, 16 +; RV32-ZVE64-NEXT: csrr a1, vlenb +; RV32-ZVE64-NEXT: slli a1, a1, 1 +; RV32-ZVE64-NEXT: add sp, sp, a1 +; RV32-ZVE64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-ZVE64-NEXT: addi sp, sp, 48 +; RV32-ZVE64-NEXT: ret +; +; RV64-ZVE64-LABEL: scalar_stack_align16: +; RV64-ZVE64: # %bb.0: +; RV64-ZVE64-NEXT: addi sp, sp, -48 +; RV64-ZVE64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-ZVE64-NEXT: csrr a0, vlenb +; RV64-ZVE64-NEXT: slli a0, a0, 1 +; RV64-ZVE64-NEXT: sub sp, sp, a0 +; RV64-ZVE64-NEXT: addi a0, sp, 32 +; RV64-ZVE64-NEXT: call extern +; RV64-ZVE64-NEXT: addi a0, sp, 16 +; RV64-ZVE64-NEXT: csrr a1, vlenb +; RV64-ZVE64-NEXT: slli a1, a1, 1 +; RV64-ZVE64-NEXT: add sp, sp, a1 +; RV64-ZVE64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-ZVE64-NEXT: addi sp, sp, 48 +; RV64-ZVE64-NEXT: ret +; +; RV32-V-LABEL: scalar_stack_align16: +; RV32-V: # %bb.0: +; RV32-V-NEXT: addi sp, sp, -48 +; RV32-V-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: sub sp, sp, a0 +; RV32-V-NEXT: addi a0, sp, 32 +; RV32-V-NEXT: call extern +; RV32-V-NEXT: addi a0, sp, 16 +; RV32-V-NEXT: csrr a1, vlenb +; RV32-V-NEXT: add sp, sp, a1 +; RV32-V-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32-V-NEXT: addi sp, sp, 48 +; RV32-V-NEXT: ret ; -; RV64-LABEL: scalar_stack_align16: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -48 -; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: sub sp, sp, a0 -; RV64-NEXT: addi a0, sp, 32 -; RV64-NEXT: call extern -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 -; RV64-NEXT: add sp, sp, a1 -; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 48 -; RV64-NEXT: ret +; RV64-V-LABEL: scalar_stack_align16: +; RV64-V: # %bb.0: +; RV64-V-NEXT: addi sp, sp, -48 +; RV64-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: sub sp, sp, a0 +; RV64-V-NEXT: addi a0, sp, 32 +; RV64-V-NEXT: call extern +; RV64-V-NEXT: addi a0, sp, 16 +; RV64-V-NEXT: csrr a1, vlenb +; RV64-V-NEXT: add sp, sp, a1 +; RV64-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-V-NEXT: addi sp, sp, 48 +; RV64-V-NEXT: ret %a = alloca %c = alloca i64, align 16 call void @extern(ptr %a) @@ -51,3 +79,6 @@ define ptr @scalar_stack_align16() nounwind { } declare void @extern(ptr) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll index f966835622a9f..ffe6ff8a91abd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll @@ -8,9 +8,8 @@ define i64 @i64( %v, i1 %c) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: andi a0, a0, 1 @@ -29,7 +28,6 @@ define i64 @i64( %v, i1 %c) { ; RV32-NEXT: li a1, 0 ; RV32-NEXT: .LBB0_3: # %falsebb ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: add sp, sp, a2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -39,9 +37,8 @@ define i64 @i64( %v, i1 %c) { ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: andi a0, a0, 1 @@ -52,7 +49,6 @@ define i64 @i64( %v, i1 %c) { ; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64-NEXT: .LBB0_2: # %falsebb ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: add sp, sp, a1 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -71,9 +67,8 @@ define i32 @i32( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -84,7 +79,6 @@ define i32 @i32( %v, i1 %c) { ; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: .LBB1_2: # %falsebb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -103,9 +97,8 @@ define i16 @i16( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -116,7 +109,6 @@ define i16 @i16( %v, i1 %c) { ; CHECK-NEXT: lh a0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: .LBB2_2: # %falsebb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -135,9 +127,8 @@ define i8 @i8( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -148,7 +139,6 @@ define i8 @i8( %v, i1 %c) { ; CHECK-NEXT: lb a0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: .LBB3_2: # %falsebb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -167,9 +157,8 @@ define double @f64( %v, i1 %c) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: andi a0, a0, 1 @@ -183,7 +172,6 @@ define double @f64( %v, i1 %c) { ; RV32-NEXT: fcvt.d.w fa0, zero ; RV32-NEXT: .LBB4_3: # %falsebb ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -193,9 +181,8 @@ define double @f64( %v, i1 %c) { ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: andi a0, a0, 1 @@ -209,7 +196,6 @@ define double @f64( %v, i1 %c) { ; RV64-NEXT: fmv.d.x fa0, zero ; RV64-NEXT: .LBB4_3: # %falsebb ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -228,9 +214,8 @@ define float @f32( %v, i1 %c) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: andi a0, a0, 1 @@ -244,7 +229,6 @@ define float @f32( %v, i1 %c) { ; CHECK-NEXT: fmv.w.x fa0, zero ; CHECK-NEXT: .LBB5_3: # %falsebb ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index b3de904d20622..4c298ab2b5e6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -508,12 +508,10 @@ define @vfadd_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -577,10 +575,8 @@ define @vfadd_vf_nxv32bf16( %va, bf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1301,12 +1297,10 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -1370,10 +1364,8 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index aa39fe5b5ec85..0fe6c5dec4264 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -470,12 +470,10 @@ define @vfdiv_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -539,10 +537,8 @@ define @vfdiv_vf_nxv32bf16( %va, bf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1213,12 +1209,10 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -1282,10 +1276,8 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index baecb7bb7d248..f0c74d064016a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -642,14 +642,13 @@ define @vfma_vv_nxv32bf16( %va, @vfma_vv_nxv32bf16( %va, @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a2, a2, a1 ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a1 @@ -1038,9 +1035,8 @@ define @vfma_vf_nxv32bf16( %va, bfl ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 @@ -1059,14 +1055,13 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a2, a2, a1 ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a1 @@ -1200,9 +1195,8 @@ define @vfma_vf_nxv32bf16_commute( ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a1, a1, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 @@ -2247,14 +2241,13 @@ define @vfma_vv_nxv32f16( %va, @vfma_vv_nxv32f16( %va, @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a2, a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 2 ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 @@ -2656,9 +2647,8 @@ define @vfma_vf_nxv32f16( %va, half %b, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a1, a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 2 ; ZVFHMIN-NEXT: add a0, a0, a1 @@ -2683,14 +2673,13 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 2 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a2, a2, a1 ; ZVFHMIN-NEXT: slli a1, a1, 2 ; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v24, a1 @@ -2824,9 +2813,8 @@ define @vfma_vf_nxv32f16_commute( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 ; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 2 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a1, a1, a0 ; ZVFHMIN-NEXT: slli a0, a0, 2 ; ZVFHMIN-NEXT: add a0, a0, a1 @@ -8663,14 +8651,13 @@ define @vfmsub_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16( %va, @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_commute( @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -673,10 +671,8 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 449130e59876f..dd57b65b50f4f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -470,12 +470,10 @@ define @vfsub_vf_nxv32bf16( %va, bf ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: fmv.x.h a1, fa0 ; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -539,10 +537,8 @@ define @vfsub_vf_nxv32bf16( %va, bf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1213,12 +1209,10 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 1 -; ZVFHMIN-NEXT: mv a2, a1 -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: add a1, a1, a2 +; ZVFHMIN-NEXT: slli a2, a1, 4 +; ZVFHMIN-NEXT: add a1, a2, a1 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; ZVFHMIN-NEXT: vmv8r.v v24, v8 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma @@ -1282,10 +1276,8 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, a0, a1 +; ZVFHMIN-NEXT: slli a1, a0, 4 +; ZVFHMIN-NEXT: add a0, a1, a0 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 4d715c7031000..0028f3035c273 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2469,10 +2469,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb ; RV64-NEXT: vl4re16.v v24, (a1) ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill @@ -2509,8 +2509,8 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll index a869b433a4952..72f25268109a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll @@ -75,7 +75,6 @@ define @test3( %0, %1, @test3( %0, %1, @test3( %0, %1, @test3( %0, %1,