diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index d13949c1ddbf..05f3f3a45ffe 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -855,7 +855,7 @@ fn compute_clobber_size(clobbers: &[Writable]) -> u32 { align_to(clobbered_size, 16) } -pub(crate) const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() +const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() .with(px_reg(1)) .with(px_reg(5)) .with(px_reg(6)) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 044368a5bc16..90186fb9fbfb 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -47,14 +47,14 @@ pub enum EmitVState { #[derive(Default, Clone, Debug)] pub struct EmitState { /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. - pub(crate) stack_map: Option, + stack_map: Option, /// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and /// optimized away at compiletime. See [cranelift_control]. - pub(crate) ctrl_plane: ControlPlane, + ctrl_plane: ControlPlane, /// Vector State /// Controls the current state of the vector unit at the emission point. - pub(crate) vstate: EmitVState, - pub(crate) frame_layout: FrameLayout, + vstate: EmitVState, + frame_layout: FrameLayout, } impl EmitState { @@ -242,14 +242,21 @@ impl MachInstEmit for Inst { self.emit_uncompressed(sink, emit_info, state, &mut start_off); } - let end_off = sink.cur_offset(); - assert!( - (end_off - start_off) <= Inst::worst_case_size(), - "Inst:{:?} length:{} worst_case_size:{}", + // We exclude br_table and return call from these checks since they emit + // their own islands, and thus are allowed to exceed the worst case size. + if !matches!( self, - end_off - start_off, - Inst::worst_case_size() - ); + Inst::BrTable { .. } | Inst::ReturnCall { .. } | Inst::ReturnCallInd { .. } + ) { + let end_off = sink.cur_offset(); + assert!( + (end_off - start_off) <= Inst::worst_case_size(), + "Inst:{:?} length:{} worst_case_size:{}", + self, + end_off - start_off, + Inst::worst_case_size() + ); + } } fn pretty_print_inst(&self, state: &mut Self::State) -> String { @@ -2608,6 +2615,40 @@ fn emit_return_call_common_sequence( emit_info: &EmitInfo, state: &mut EmitState, info: &ReturnCallInfo, +) { + // The return call sequence can potentially emit a lot of instructions (up to 634 bytes!) + // So lets emit an island here if we need it. + // + // It is difficult to calculate exactly how many instructions are going to be emitted, so + // we calculate it by emitting it into a disposable buffer, and then checking how many instructions + // were actually emitted. + let mut buffer = MachBuffer::new(); + let mut fake_emit_state = state.clone(); + + return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info); + + // Finalize the buffer and get the number of bytes emitted. + let buffer = buffer.finish(&Default::default(), &mut Default::default()); + let length = buffer.data().len() as u32; + + // And now emit the island inline with this instruction. + if sink.island_needed(length) { + let jump_around_label = sink.get_label(); + Inst::gen_jump(jump_around_label).emit(sink, emit_info, state); + sink.emit_island(length + 4, &mut state.ctrl_plane); + sink.bind_label(jump_around_label, &mut state.ctrl_plane); + } + + // Now that we're done, emit the *actual* return sequence. + return_call_emit_impl(sink, emit_info, state, info); +} + +/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence]. +fn return_call_emit_impl( + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, + info: &ReturnCallInfo, ) { let sp_to_fp_offset = { let frame_layout = state.frame_layout(); diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs index ffcc78f33aa5..693658ba4bd8 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -1,6 +1,6 @@ #[allow(unused)] use crate::ir::LibCall; -use crate::isa::riscv64::{abi::DEFAULT_CLOBBERS, inst::*}; +use crate::isa::riscv64::inst::*; use std::borrow::Cow; fn fa7() -> Reg { @@ -2198,45 +2198,13 @@ fn riscv64_worst_case_instruction_size() { }), ); - candidates.push(Inst::ReturnCallInd { - callee: a0(), - info: Box::new(ReturnCallInfo { - opcode: Opcode::ReturnCallIndirect, - new_stack_arg_size: 64, - uses: DEFAULT_CLOBBERS - .into_iter() - .map(|reg| CallArgPair { - vreg: reg.into(), - preg: reg.into(), - }) - .collect(), - }), - }); + // Return Call Indirect and BrTable are the largest instructions possible. However they + // emit their own island, so we don't account them here. let mut max: (u32, MInst) = (0, Inst::Nop0); for i in candidates { let mut buffer = MachBuffer::new(); - let mut emit_state = EmitState { - // This frame layout is important to ensure that the ReturnCallIndirect - // instruction in this test, becomes as large as practically possible. - frame_layout: FrameLayout { - tail_args_size: 64, - setup_area_size: 8192, - clobber_size: DEFAULT_CLOBBERS - .into_iter() - .filter(|r| r.class() != RegClass::Vector) - .map(|_| 8) - .sum(), - fixed_frame_storage_size: 1 << 16, - clobbered_callee_saves: DEFAULT_CLOBBERS - .into_iter() - .filter(|r| r.class() != RegClass::Vector) - .map(|r| Writable::from_reg(r.into())) - .collect(), - ..Default::default() - }, - ..Default::default() - }; + let mut emit_state = Default::default(); i.emit(&mut buffer, &emit_info, &mut emit_state); let buffer = buffer.finish(&Default::default(), &mut Default::default()); let length = buffer.data().len() as u32; diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 6518192a6396..35c0138a1dea 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -845,7 +845,7 @@ impl MachInst for Inst { fn worst_case_size() -> CodeOffset { // Our worst case size is determined by the riscv64_worst_case_instruction_size test - 636 + 76 } fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { diff --git a/cranelift/filetests/filetests/isa/riscv64/return-call.clif b/cranelift/filetests/filetests/isa/riscv64/return-call.clif index 5a575cb1ae2f..e119025b92a4 100644 --- a/cranelift/filetests/filetests/isa/riscv64/return-call.clif +++ b/cranelift/filetests/filetests/isa/riscv64/return-call.clif @@ -693,11 +693,8 @@ block2: ; addi s8, zero, 0x7d ; addi s7, zero, 0x82 ; addi s6, zero, 0x87 -; bnez a0, 8 -; j 0xc -; auipc t6, 0 -; jalr zero, t6, 0xb4 -; block2: ; offset 0xd8 +; bnez a0, 0xb0 +; block2: ; offset 0xcc ; addi a0, zero, 0x8c ; sd a2, 0x90(sp) ; sd a1, 0x98(sp) @@ -741,7 +738,7 @@ block2: ; ld s0, 0x80(sp) ; addi sp, sp, 0x90 ; jr t0 -; block3: ; offset 0x184 +; block3: ; offset 0x178 ; ld a0, 0x10(sp) ; sd a2, 0xa0(sp) ; sd a1, 0xa8(sp)