Skip to content

Commit

Permalink
winch: Optimize calls
Browse files Browse the repository at this point in the history
This commit introduces several optimizations to speed up the compilation
of function calls:

* Keep track of previously resolved function signatures for local or
  imported callees to avoid computing the `ABISig` on every
  function call.
* Keep track of previously resolved type signatures for indirect calls
  to avoid computing the `ABISig` on every function call.
* Refactor `CallKnown` and `CallUnknown` instructions to make the
  `BoxCallInfo` fiel in the struction optional. Prior to this change,
  from Winch's perspective each call lowering involved a heap
  allocation, using the default values for `BoxCallInfo`, which in the
  end are not used by Winch.
* Switch Winch's internal `Stack` to use a `SmallVec` rather than
  a `Vec`. Many of the operations involving builtin function calls
  require inserting elements at arbitrary offsets in the stack and
  using a `SmallVec` makes this process more efficient.

With the changes mentioned above, I observed ~30% improvement in
compilation times for modules that are call-heavy.
  • Loading branch information
saulecabrera committed Feb 13, 2024
1 parent c7f748d commit 00477ff
Show file tree
Hide file tree
Showing 18 changed files with 334 additions and 288 deletions.
6 changes: 3 additions & 3 deletions cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,17 +506,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Writable::from_reg(regs::rax()),
));
insts.push(Inst::CallKnown {
opcode: Opcode::Call,
dest: ExternalName::LibCall(LibCall::Probestack),
info: Box::new(CallInfo {
info: Some(Box::new(CallInfo {
// No need to include arg here: we are post-regalloc
// so no constraints will be seen anyway.
uses: smallvec![],
defs: smallvec![],
clobbers: PRegSet::empty(),
opcode: Opcode::Call,
callee_pop_size: 0,
callee_conv: CallConv::Probestack,
}),
})),
});
}

Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -525,10 +525,12 @@

;; Direct call: call simm32.
(CallKnown (dest ExternalName)
(opcode Opcode)
(info BoxCallInfo))

;; Indirect call: callq (reg mem)
(CallUnknown (dest RegMem)
(opcode Opcode)
(info BoxCallInfo))

;; Tail call to a direct destination.
Expand Down
32 changes: 14 additions & 18 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1609,11 +1609,7 @@ pub(crate) fn emit(
inst.emit(&[], sink, info, state);
}

Inst::CallKnown {
dest,
info: call_info,
..
} => {
Inst::CallKnown { dest, opcode, info } => {
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
}
Expand All @@ -1622,12 +1618,14 @@ pub(crate) fn emit(
// beginning of the immediate field.
emit_reloc(sink, Reloc::X86CallPCRel4, &dest, -4);
sink.put4(0);
if call_info.opcode.is_call() {
sink.add_call_site(call_info.opcode);
if opcode.is_call() {
sink.add_call_site(*opcode);
}

let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
if let Some(call_info) = info {
let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
}
}

Inst::ReturnCallKnown {
Expand Down Expand Up @@ -1683,11 +1681,7 @@ pub(crate) fn emit(
sink.add_call_site(ir::Opcode::ReturnCallIndirect);
}

Inst::CallUnknown {
dest,
info: call_info,
..
} => {
Inst::CallUnknown { dest, opcode, info } => {
let dest = dest.with_allocs(allocs);

let start_offset = sink.cur_offset();
Expand Down Expand Up @@ -1722,12 +1716,14 @@ pub(crate) fn emit(
if let Some(s) = state.take_stack_map() {
sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s);
}
if call_info.opcode.is_call() {
sink.add_call_site(call_info.opcode);
if opcode.is_call() {
sink.add_call_site(*opcode);
}

let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
if let Some(call_info) = info {
let callee_pop_size = i64::from(call_info.callee_pop_size);
state.adjust_virtual_sp_offset(-callee_pop_size);
}
}

Inst::Args { .. } => {}
Expand Down
62 changes: 32 additions & 30 deletions cranelift/codegen/src/isa/x64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ pub struct CallInfo {
pub defs: CallRetList,
/// Registers clobbered by this call, as per its calling convention.
pub clobbers: PRegSet,
/// The opcode of this call.
pub opcode: Opcode,
/// The number of bytes that the callee will pop from the stack for the
/// caller, if any. (Used for popping stack arguments with the `tail`
/// calling convention.)
Expand Down Expand Up @@ -556,14 +554,14 @@ impl Inst {
) -> Inst {
Inst::CallKnown {
dest,
info: Box::new(CallInfo {
opcode,
info: Some(Box::new(CallInfo {
uses,
defs,
clobbers,
opcode,
callee_pop_size,
callee_conv,
}),
})),
}
}

Expand All @@ -579,14 +577,14 @@ impl Inst {
dest.assert_regclass_is(RegClass::Int);
Inst::CallUnknown {
dest,
info: Box::new(CallInfo {
opcode,
info: Some(Box::new(CallInfo {
uses,
defs,
clobbers,
opcode,
callee_pop_size,
callee_conv,
}),
})),
}
}

Expand Down Expand Up @@ -2348,37 +2346,41 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_early_def(*tmp);
}

Inst::CallKnown { dest, ref info, .. } => {
Inst::CallKnown { dest, info, .. } => {
// Probestack is special and is only inserted after
// regalloc, so we do not need to represent its ABI to the
// register allocator. Assert that we don't alter that
// arrangement.
debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
for d in &info.defs {
collector.reg_fixed_def(d.vreg, d.preg);
if let Some(info) = info {
debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack));
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
for d in &info.defs {
collector.reg_fixed_def(d.vreg, d.preg);
}
collector.reg_clobbers(info.clobbers);
}
collector.reg_clobbers(info.clobbers);
}

Inst::CallUnknown { ref info, dest, .. } => {
match dest {
RegMem::Reg { reg } if info.callee_conv == CallConv::Tail => {
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
// This shouldn't be a fixed register constraint.
collector.reg_fixed_use(*reg, regs::r15())
Inst::CallUnknown { info, dest, .. } => {
if let Some(info) = info {
match dest {
RegMem::Reg { reg } if info.callee_conv == CallConv::Tail => {
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
// This shouldn't be a fixed register constraint.
collector.reg_fixed_use(*reg, regs::r15())
}
_ => dest.get_operands(collector),
}
_ => dest.get_operands(collector),
}
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
for d in &info.defs {
collector.reg_fixed_def(d.vreg, d.preg);
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
for d in &info.defs {
collector.reg_fixed_def(d.vreg, d.preg);
}
collector.reg_clobbers(info.clobbers);
}
collector.reg_clobbers(info.clobbers);
}

Inst::ReturnCallKnown { callee, info } => {
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use regalloc2::PReg;
use std::boxed::Box;
use std::convert::TryFrom;

type BoxCallInfo = Box<CallInfo>;
type BoxCallInfo = Option<Box<CallInfo>>;
type BoxReturnCallInfo = Box<ReturnCallInfo>;
type VecArgPair = Vec<ArgPair>;

Expand Down
75 changes: 22 additions & 53 deletions winch/codegen/src/codegen/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,8 @@ use crate::{
masm::{CalleeKind, MacroAssembler, MemMoveDirection, OperandSize, SPOffset},
reg::Reg,
stack::Val,
CallingConvention,
};
use smallvec::SmallVec;
use std::borrow::Cow;
use wasmtime_environ::{PtrSize, VMOffsets, WasmValType};
use wasmtime_environ::{PtrSize, VMOffsets};

/// All the information needed to emit a function call.
#[derive(Copy, Clone)]
Expand All @@ -83,23 +80,19 @@ impl FnCall {
/// 4. Creates the stack space needed for the return area.
/// 5. Emits the call.
/// 6. Cleans up the stack space.
pub fn emit<M: MacroAssembler, P: PtrSize, R>(
pub fn emit<'a, M: MacroAssembler, P: PtrSize>(
masm: &mut M,
context: &mut CodeGenContext,
mut resolve: R,
) where
R: FnMut(&mut CodeGenContext) -> Callee,
{
let callee = resolve(context);
let ptr_type = ptr_type_from_ptr_size(context.vmoffsets.ptr.size());
let mut sig = Self::get_sig::<M>(&callee, ptr_type);
let kind = Self::map(&context.vmoffsets, &callee, sig.as_ref(), context, masm);
callee: Callee<'a>,
) {
let sig = callee.sig();
let kind = Self::map(&context.vmoffsets, &callee, sig, context, masm);

context.spill(masm);
let ret_area = Self::make_ret_area(&sig, masm);
let arg_stack_space = sig.params_stack_size();
let reserved_stack = masm.call(arg_stack_space, |masm| {
Self::assign(sig.as_ref(), ret_area.as_ref(), context, masm);
Self::assign(sig, ret_area.as_ref(), context, masm);
kind
});

Expand All @@ -108,7 +101,7 @@ impl FnCall {
_ => {}
}

Self::cleanup(&mut sig, reserved_stack, ret_area, masm, context);
Self::cleanup(sig, reserved_stack, ret_area, masm, context);
}

/// Calculates the return area for the callee, if any.
Expand All @@ -123,30 +116,6 @@ impl FnCall {
})
}

/// Derive the [`ABISig`] for a particular [`Callee`].
fn get_sig<M: MacroAssembler>(callee: &Callee, ptr_type: WasmValType) -> Cow<'_, ABISig> {
match callee {
Callee::Builtin(info) => Cow::Borrowed(info.sig()),
Callee::Import(info) => {
let mut params: SmallVec<[WasmValType; 6]> =
SmallVec::with_capacity(info.ty.params().len() + 2);
params.extend_from_slice(&[ptr_type, ptr_type]);
params.extend_from_slice(info.ty.params());
Cow::Owned(<M::ABI as ABI>::sig_from(
&params,
info.ty.returns(),
&CallingConvention::Default,
))
}
Callee::Local(info) => {
Cow::Owned(<M::ABI as ABI>::sig(&info.ty, &CallingConvention::Default))
}
Callee::FuncRef(ty) => {
Cow::Owned(<M::ABI as ABI>::sig(&ty, &CallingConvention::Default))
}
}
}

/// Maps the given [`Callee`] to a [`CalleeKind`].
fn map<P: PtrSize, M: MacroAssembler>(
vmoffsets: &VMOffsets<P>,
Expand Down Expand Up @@ -221,7 +190,7 @@ impl FnCall {
let location = stack.len().checked_sub(sig.params.len() - 2).unwrap_or(0);
context.stack.insert_many(
location,
[
&[
TypedReg::new(ptr_type, callee_vmctx).into(),
TypedReg::new(ptr_type, caller_vmctx).into(),
],
Expand Down Expand Up @@ -303,7 +272,7 @@ impl FnCall {
/// Cleanup stack space, handle multiple results, and free registers after
/// emitting the call.
fn cleanup<M: MacroAssembler>(
sig: &mut Cow<'_, ABISig>,
sig: &ABISig,
reserved_space: u32,
ret_area: Option<RetArea>,
masm: &mut M,
Expand Down Expand Up @@ -340,22 +309,22 @@ impl FnCall {
// Free the bytes consumed by the call.
masm.free_stack(stack_consumed);

if let Some(area) = ret_area {
debug_assert!(!area.is_uninit());
let ret_area = ret_area.map(|area| {
if stack_consumed > 0 {
sig.to_mut()
.results
.set_ret_area(RetArea::sp(masm.sp_offset()));
// If there's a return area and stack space was consumed by the
// call, adjust the return area to be to the current stack
// pointer offset.
RetArea::sp(masm.sp_offset())
} else {
// If theres a return area, and no memory was adjusted
// (memmoved), the offsets should be equal.
// Else if no stack space was consumed by the call, simply use
// the previously calculated area.
debug_assert_eq!(area.unwrap_sp(), masm.sp_offset());
sig.to_mut().results.set_ret_area(area);
area
}
}

context.push_abi_results(&sig.results, masm, |results, _, _| {
results.ret_area().copied()
});

// In the case of [Callee], there's no need to set the [RetArea] of the
// signature, as it's only used here to push abi results.
context.push_abi_results(&sig.results, masm, |_, _, _| ret_area);
}
}
9 changes: 2 additions & 7 deletions winch/codegen/src/codegen/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use wasmtime_environ::{VMOffsets, WasmHeapType, WasmValType};
use super::ControlStackFrame;
use crate::{
abi::{ABIOperand, ABIResults, RetArea, ABI},
codegen::BuiltinFunctions,
frame::Frame,
isa::reg::RegClass,
masm::{MacroAssembler, OperandSize, RegImm, SPOffset, StackSlot},
Expand All @@ -27,7 +26,7 @@ use crate::{
/// generation process. The code generation context should
/// be generally used as the single entry point to access
/// the compound functionality provided by its elements.
pub(crate) struct CodeGenContext<'a, 'builtins: 'a> {
pub(crate) struct CodeGenContext<'a> {
/// The register allocator.
pub regalloc: RegAlloc,
/// The value stack.
Expand All @@ -36,27 +35,23 @@ pub(crate) struct CodeGenContext<'a, 'builtins: 'a> {
pub frame: Frame,
/// Reachability state.
pub reachable: bool,
/// The built-in functions available to the JIT code.
pub builtins: &'builtins mut BuiltinFunctions,
/// A reference to the VMOffsets.
pub vmoffsets: &'a VMOffsets<u8>,
}

impl<'a, 'builtins> CodeGenContext<'a, 'builtins> {
impl<'a> CodeGenContext<'a> {
/// Create a new code generation context.
pub fn new(
regalloc: RegAlloc,
stack: Stack,
frame: Frame,
builtins: &'builtins mut BuiltinFunctions,
vmoffsets: &'a VMOffsets<u8>,
) -> Self {
Self {
regalloc,
stack,
frame,
reachable: true,
builtins,
vmoffsets,
}
}
Expand Down
Loading

0 comments on commit 00477ff

Please sign in to comment.