Skip to content

Commit

Permalink
riscv64: Avoid reloading probe amount when unrolling
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed Nov 3, 2023
1 parent 3df41c7 commit f24e4fc
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 48 deletions.
53 changes: 31 additions & 22 deletions cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,10 +718,20 @@ impl ABIMachineSpec for Riscv64MachineDeps {
// Number of probes that we need to perform
let probe_count = align_to(frame_size, guard_size) / guard_size;

// Must be a caller-saved register that is not an argument.
let tmp = match call_conv {
isa::CallConv::Tail => Writable::from_reg(x_reg(1)),
_ => Writable::from_reg(x_reg(28)), // t3
};

if probe_count <= PROBE_MAX_UNROLL {
Self::gen_probestack_unroll(insts, guard_size, probe_count)
Self::gen_probestack_unroll(insts, tmp, guard_size, probe_count)
} else {
Self::gen_probestack_loop(insts, call_conv, guard_size, probe_count)
insts.push(Inst::StackProbeLoop {
guard_size,
probe_count,
tmp,
});
}
}
}
Expand Down Expand Up @@ -1052,15 +1062,32 @@ fn create_reg_enviroment() -> MachineEnv {
}

impl Riscv64MachineDeps {
fn gen_probestack_unroll(insts: &mut SmallInstVec<Inst>, guard_size: u32, probe_count: u32) {
fn gen_probestack_unroll(
insts: &mut SmallInstVec<Inst>,
tmp: Writable<Reg>,
guard_size: u32,
probe_count: u32,
) {
// When manually unrolling adjust the stack pointer and then write a zero
// to the stack at that offset.
//
// We do this because valgrind expects us to never write beyond the stack
// pointer and associated redzone.
// See: https://github.com/bytecodealliance/wasmtime/issues/7454

// Store the adjust amount in a register upfront, so we don't have to
// reload it for each probe. It's worth loading this as a negative and
// using an `add` instruction since we have compressed versions of `add`
// but not the `sub` instruction.
insts.extend(Inst::load_constant_u64(tmp, (-(guard_size as i64)) as u64));

for _ in 0..probe_count {
insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));
insts.push(Inst::AluRRR {
alu_op: AluOPRRR::Add,
rd: writable_stack_reg(),
rs1: stack_reg(),
rs2: tmp.to_reg(),
});

insts.push(Self::gen_store_stack(
StackAMode::SPOffset(0, I8),
Expand All @@ -1072,22 +1099,4 @@ impl Riscv64MachineDeps {
// Restore the stack pointer to its original value
insts.extend(Self::gen_sp_reg_adjust((guard_size * probe_count) as i32));
}

fn gen_probestack_loop(
insts: &mut SmallInstVec<Inst>,
call_conv: isa::CallConv,
guard_size: u32,
probe_count: u32,
) {
// Must be a caller-saved register that is not an argument.
let tmp = match call_conv {
isa::CallConv::Tail => Writable::from_reg(x_reg(1)),
_ => Writable::from_reg(x_reg(28)), // t3
};
insts.push(Inst::StackProbeLoop {
guard_size,
probe_count,
tmp,
});
}
}
22 changes: 9 additions & 13 deletions cranelift/filetests/filetests/isa/riscv64/c-inline-probestack.clif
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,12 @@ block0:
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; lui t6,-1
; add sp,t6,sp
; lui t3,-1
; add sp,sp,t3
; sw zero,0(sp)
; lui t6,-1
; add sp,t6,sp
; add sp,sp,t3
; sw zero,0(sp)
; lui t6,-1
; add sp,t6,sp
; add sp,sp,t3
; sw zero,0(sp)
; lui t6,3
; add sp,t6,sp
Expand All @@ -89,20 +87,18 @@ block0:
; c.sdsp ra, 8(sp)
; c.sdsp s0, 0(sp)
; c.mv s0, sp
; c.lui t6, 0xfffff
; add sp, t6, sp
; c.lui t3, 0xfffff
; c.add sp, t3
; c.swsp zero, 0(sp)
; c.lui t6, 0xfffff
; add sp, t6, sp
; c.add sp, t3
; c.swsp zero, 0(sp)
; c.lui t6, 0xfffff
; add sp, t6, sp
; c.add sp, t3
; c.swsp zero, 0(sp)
; c.lui t6, 3
; add sp, t6, sp
; c.lui t6, 0xffffd
; add sp, t6, sp
; block1: ; offset 0x2c
; block1: ; offset 0x22
; c.mv a0, sp
; c.lui t6, 3
; add sp, t6, sp
Expand Down
22 changes: 9 additions & 13 deletions cranelift/filetests/filetests/isa/riscv64/inline-probestack.clif
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,12 @@ block0:
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; lui t6,-1
; add sp,t6,sp
; lui t3,-1
; add sp,sp,t3
; sw zero,0(sp)
; lui t6,-1
; add sp,t6,sp
; add sp,sp,t3
; sw zero,0(sp)
; lui t6,-1
; add sp,t6,sp
; add sp,sp,t3
; sw zero,0(sp)
; lui t6,3
; add sp,t6,sp
Expand All @@ -89,20 +87,18 @@ block0:
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; lui t6, 0xfffff
; add sp, t6, sp
; lui t3, 0xfffff
; add sp, sp, t3
; sw zero, 0(sp)
; lui t6, 0xfffff
; add sp, t6, sp
; add sp, sp, t3
; sw zero, 0(sp)
; lui t6, 0xfffff
; add sp, t6, sp
; add sp, sp, t3
; sw zero, 0(sp)
; lui t6, 3
; add sp, t6, sp
; lui t6, 0xffffd
; add sp, t6, sp
; block1: ; offset 0x44
; block1: ; offset 0x3c
; mv a0, sp
; lui t6, 3
; add sp, t6, sp
Expand Down

0 comments on commit f24e4fc

Please sign in to comment.