diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 407363c1db0c..095ef13f1335 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -718,10 +718,20 @@ impl ABIMachineSpec for Riscv64MachineDeps { // Number of probes that we need to perform let probe_count = align_to(frame_size, guard_size) / guard_size; + // Must be a caller-saved register that is not an argument. + let tmp = match call_conv { + isa::CallConv::Tail => Writable::from_reg(x_reg(1)), + _ => Writable::from_reg(x_reg(28)), // t3 + }; + if probe_count <= PROBE_MAX_UNROLL { - Self::gen_probestack_unroll(insts, guard_size, probe_count) + Self::gen_probestack_unroll(insts, tmp, guard_size, probe_count) } else { - Self::gen_probestack_loop(insts, call_conv, guard_size, probe_count) + insts.push(Inst::StackProbeLoop { + guard_size, + probe_count, + tmp, + }); } } } @@ -1052,15 +1062,32 @@ fn create_reg_enviroment() -> MachineEnv { } impl Riscv64MachineDeps { - fn gen_probestack_unroll(insts: &mut SmallInstVec, guard_size: u32, probe_count: u32) { + fn gen_probestack_unroll( + insts: &mut SmallInstVec, + tmp: Writable, + guard_size: u32, + probe_count: u32, + ) { // When manually unrolling adjust the stack pointer and then write a zero // to the stack at that offset. // // We do this because valgrind expects us to never write beyond the stack // pointer and associated redzone. // See: https://github.com/bytecodealliance/wasmtime/issues/7454 + + // Store the adjust amount in a register upfront, so we don't have to + // reload it for each probe. It's worth loading this as a negative and + // using an `add` instruction since we have compressed versions of `add` + // but not the `sub` instruction. + insts.extend(Inst::load_constant_u64(tmp, (-(guard_size as i64)) as u64)); + for _ in 0..probe_count { - insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32))); + insts.push(Inst::AluRRR { + alu_op: AluOPRRR::Add, + rd: writable_stack_reg(), + rs1: stack_reg(), + rs2: tmp.to_reg(), + }); insts.push(Self::gen_store_stack( StackAMode::SPOffset(0, I8), @@ -1072,22 +1099,4 @@ impl Riscv64MachineDeps { // Restore the stack pointer to its original value insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32)); } - - fn gen_probestack_loop( - insts: &mut SmallInstVec, - call_conv: isa::CallConv, - guard_size: u32, - probe_count: u32, - ) { - // Must be a caller-saved register that is not an argument. - let tmp = match call_conv { - isa::CallConv::Tail => Writable::from_reg(x_reg(1)), - _ => Writable::from_reg(x_reg(28)), // t3 - }; - insts.push(Inst::StackProbeLoop { - guard_size, - probe_count, - tmp, - }); - } } diff --git a/cranelift/filetests/filetests/isa/riscv64/c-inline-probestack.clif b/cranelift/filetests/filetests/isa/riscv64/c-inline-probestack.clif index 1af5f12dcc32..6d94c2f81d9d 100644 --- a/cranelift/filetests/filetests/isa/riscv64/c-inline-probestack.clif +++ b/cranelift/filetests/filetests/isa/riscv64/c-inline-probestack.clif @@ -61,14 +61,12 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; lui t6,-1 -; add sp,t6,sp +; lui t3,-1 +; add sp,sp,t3 ; sw zero,0(sp) -; lui t6,-1 -; add sp,t6,sp +; add sp,sp,t3 ; sw zero,0(sp) -; lui t6,-1 -; add sp,t6,sp +; add sp,sp,t3 ; sw zero,0(sp) ; lui t6,3 ; add sp,t6,sp @@ -89,20 +87,18 @@ block0: ; c.sdsp ra, 8(sp) ; c.sdsp s0, 0(sp) ; c.mv s0, sp -; c.lui t6, 0xfffff -; add sp, t6, sp +; c.lui t3, 0xfffff +; c.add sp, t3 ; c.swsp zero, 0(sp) -; c.lui t6, 0xfffff -; add sp, t6, sp +; c.add sp, t3 ; c.swsp zero, 0(sp) -; c.lui t6, 0xfffff -; add sp, t6, sp +; c.add sp, t3 ; c.swsp zero, 0(sp) ; c.lui t6, 3 ; add sp, t6, sp ; c.lui t6, 0xffffd ; add sp, t6, sp -; block1: ; offset 0x2c +; block1: ; offset 0x22 ; c.mv a0, sp ; c.lui t6, 3 ; add sp, t6, sp diff --git a/cranelift/filetests/filetests/isa/riscv64/inline-probestack.clif b/cranelift/filetests/filetests/isa/riscv64/inline-probestack.clif index f85ac24bb65d..6b8177668b2b 100644 --- a/cranelift/filetests/filetests/isa/riscv64/inline-probestack.clif +++ b/cranelift/filetests/filetests/isa/riscv64/inline-probestack.clif @@ -61,14 +61,12 @@ block0: ; sd ra,8(sp) ; sd fp,0(sp) ; mv fp,sp -; lui t6,-1 -; add sp,t6,sp +; lui t3,-1 +; add sp,sp,t3 ; sw zero,0(sp) -; lui t6,-1 -; add sp,t6,sp +; add sp,sp,t3 ; sw zero,0(sp) -; lui t6,-1 -; add sp,t6,sp +; add sp,sp,t3 ; sw zero,0(sp) ; lui t6,3 ; add sp,t6,sp @@ -89,20 +87,18 @@ block0: ; sd ra, 8(sp) ; sd s0, 0(sp) ; mv s0, sp -; lui t6, 0xfffff -; add sp, t6, sp +; lui t3, 0xfffff +; add sp, sp, t3 ; sw zero, 0(sp) -; lui t6, 0xfffff -; add sp, t6, sp +; add sp, sp, t3 ; sw zero, 0(sp) -; lui t6, 0xfffff -; add sp, t6, sp +; add sp, sp, t3 ; sw zero, 0(sp) ; lui t6, 3 ; add sp, t6, sp ; lui t6, 0xffffd ; add sp, t6, sp -; block1: ; offset 0x44 +; block1: ; offset 0x3c ; mv a0, sp ; lui t6, 3 ; add sp, t6, sp