From 00477ff20c7257996635089811290c5d9edc56b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Wed, 24 Jan 2024 11:11:58 -0500 Subject: [PATCH] winch: Optimize calls This commit introduces several optimizations to speed up the compilation of function calls: * Keep track of previously resolved function signatures for local or imported callees to avoid computing the `ABISig` on every function call. * Keep track of previously resolved type signatures for indirect calls to avoid computing the `ABISig` on every function call. * Refactor `CallKnown` and `CallUnknown` instructions to make the `BoxCallInfo` fiel in the struction optional. Prior to this change, from Winch's perspective each call lowering involved a heap allocation, using the default values for `BoxCallInfo`, which in the end are not used by Winch. * Switch Winch's internal `Stack` to use a `SmallVec` rather than a `Vec`. Many of the operations involving builtin function calls require inserting elements at arbitrary offsets in the stack and using a `SmallVec` makes this process more efficient. With the changes mentioned above, I observed ~30% improvement in compilation times for modules that are call-heavy. --- cranelift/codegen/src/isa/x64/abi.rs | 6 +- cranelift/codegen/src/isa/x64/inst.isle | 2 + cranelift/codegen/src/isa/x64/inst/emit.rs | 32 ++- cranelift/codegen/src/isa/x64/inst/mod.rs | 62 +++--- cranelift/codegen/src/isa/x64/lower/isle.rs | 2 +- winch/codegen/src/codegen/call.rs | 75 +++----- winch/codegen/src/codegen/context.rs | 9 +- winch/codegen/src/codegen/env.rs | 93 ++++++--- winch/codegen/src/codegen/mod.rs | 14 +- winch/codegen/src/isa/aarch64/masm.rs | 6 +- winch/codegen/src/isa/aarch64/mod.rs | 4 +- winch/codegen/src/isa/x64/asm.rs | 35 +--- winch/codegen/src/isa/x64/masm.rs | 42 +--- winch/codegen/src/isa/x64/mod.rs | 4 +- winch/codegen/src/masm.rs | 11 +- winch/codegen/src/regalloc.rs | 2 +- winch/codegen/src/stack.rs | 20 +- winch/codegen/src/visitor.rs | 203 +++++++++++++------- 18 files changed, 334 insertions(+), 288 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index d7df15c614d4..b276e448c4a0 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -506,17 +506,17 @@ impl ABIMachineSpec for X64ABIMachineSpec { Writable::from_reg(regs::rax()), )); insts.push(Inst::CallKnown { + opcode: Opcode::Call, dest: ExternalName::LibCall(LibCall::Probestack), - info: Box::new(CallInfo { + info: Some(Box::new(CallInfo { // No need to include arg here: we are post-regalloc // so no constraints will be seen anyway. uses: smallvec![], defs: smallvec![], clobbers: PRegSet::empty(), - opcode: Opcode::Call, callee_pop_size: 0, callee_conv: CallConv::Probestack, - }), + })), }); } diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 3812d459ce73..f07251c8bb0c 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -525,10 +525,12 @@ ;; Direct call: call simm32. (CallKnown (dest ExternalName) + (opcode Opcode) (info BoxCallInfo)) ;; Indirect call: callq (reg mem) (CallUnknown (dest RegMem) + (opcode Opcode) (info BoxCallInfo)) ;; Tail call to a direct destination. diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 546f961da636..4e93b5d2497d 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1609,11 +1609,7 @@ pub(crate) fn emit( inst.emit(&[], sink, info, state); } - Inst::CallKnown { - dest, - info: call_info, - .. - } => { + Inst::CallKnown { dest, opcode, info } => { if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s); } @@ -1622,12 +1618,14 @@ pub(crate) fn emit( // beginning of the immediate field. emit_reloc(sink, Reloc::X86CallPCRel4, &dest, -4); sink.put4(0); - if call_info.opcode.is_call() { - sink.add_call_site(call_info.opcode); + if opcode.is_call() { + sink.add_call_site(*opcode); } - let callee_pop_size = i64::from(call_info.callee_pop_size); - state.adjust_virtual_sp_offset(-callee_pop_size); + if let Some(call_info) = info { + let callee_pop_size = i64::from(call_info.callee_pop_size); + state.adjust_virtual_sp_offset(-callee_pop_size); + } } Inst::ReturnCallKnown { @@ -1683,11 +1681,7 @@ pub(crate) fn emit( sink.add_call_site(ir::Opcode::ReturnCallIndirect); } - Inst::CallUnknown { - dest, - info: call_info, - .. - } => { + Inst::CallUnknown { dest, opcode, info } => { let dest = dest.with_allocs(allocs); let start_offset = sink.cur_offset(); @@ -1722,12 +1716,14 @@ pub(crate) fn emit( if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s); } - if call_info.opcode.is_call() { - sink.add_call_site(call_info.opcode); + if opcode.is_call() { + sink.add_call_site(*opcode); } - let callee_pop_size = i64::from(call_info.callee_pop_size); - state.adjust_virtual_sp_offset(-callee_pop_size); + if let Some(call_info) = info { + let callee_pop_size = i64::from(call_info.callee_pop_size); + state.adjust_virtual_sp_offset(-callee_pop_size); + } } Inst::Args { .. } => {} diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index ea2ff83b3c83..bbf26722a427 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -41,8 +41,6 @@ pub struct CallInfo { pub defs: CallRetList, /// Registers clobbered by this call, as per its calling convention. pub clobbers: PRegSet, - /// The opcode of this call. - pub opcode: Opcode, /// The number of bytes that the callee will pop from the stack for the /// caller, if any. (Used for popping stack arguments with the `tail` /// calling convention.) @@ -556,14 +554,14 @@ impl Inst { ) -> Inst { Inst::CallKnown { dest, - info: Box::new(CallInfo { + opcode, + info: Some(Box::new(CallInfo { uses, defs, clobbers, - opcode, callee_pop_size, callee_conv, - }), + })), } } @@ -579,14 +577,14 @@ impl Inst { dest.assert_regclass_is(RegClass::Int); Inst::CallUnknown { dest, - info: Box::new(CallInfo { + opcode, + info: Some(Box::new(CallInfo { uses, defs, clobbers, - opcode, callee_pop_size, callee_conv, - }), + })), } } @@ -2348,37 +2346,41 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_early_def(*tmp); } - Inst::CallKnown { dest, ref info, .. } => { + Inst::CallKnown { dest, info, .. } => { // Probestack is special and is only inserted after // regalloc, so we do not need to represent its ABI to the // register allocator. Assert that we don't alter that // arrangement. - debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack)); - for u in &info.uses { - collector.reg_fixed_use(u.vreg, u.preg); - } - for d in &info.defs { - collector.reg_fixed_def(d.vreg, d.preg); + if let Some(info) = info { + debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack)); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); } - collector.reg_clobbers(info.clobbers); } - Inst::CallUnknown { ref info, dest, .. } => { - match dest { - RegMem::Reg { reg } if info.callee_conv == CallConv::Tail => { - // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): - // This shouldn't be a fixed register constraint. - collector.reg_fixed_use(*reg, regs::r15()) + Inst::CallUnknown { info, dest, .. } => { + if let Some(info) = info { + match dest { + RegMem::Reg { reg } if info.callee_conv == CallConv::Tail => { + // TODO(https://github.com/bytecodealliance/regalloc2/issues/145): + // This shouldn't be a fixed register constraint. + collector.reg_fixed_use(*reg, regs::r15()) + } + _ => dest.get_operands(collector), } - _ => dest.get_operands(collector), - } - for u in &info.uses { - collector.reg_fixed_use(u.vreg, u.preg); - } - for d in &info.defs { - collector.reg_fixed_def(d.vreg, d.preg); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } + collector.reg_clobbers(info.clobbers); } - collector.reg_clobbers(info.clobbers); } Inst::ReturnCallKnown { callee, info } => { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 474484c71512..94e3e12871bf 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -33,7 +33,7 @@ use regalloc2::PReg; use std::boxed::Box; use std::convert::TryFrom; -type BoxCallInfo = Box; +type BoxCallInfo = Option>; type BoxReturnCallInfo = Box; type VecArgPair = Vec; diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index 8ae5d89a6a61..9e3817f74682 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -65,11 +65,8 @@ use crate::{ masm::{CalleeKind, MacroAssembler, MemMoveDirection, OperandSize, SPOffset}, reg::Reg, stack::Val, - CallingConvention, }; -use smallvec::SmallVec; -use std::borrow::Cow; -use wasmtime_environ::{PtrSize, VMOffsets, WasmValType}; +use wasmtime_environ::{PtrSize, VMOffsets}; /// All the information needed to emit a function call. #[derive(Copy, Clone)] @@ -83,23 +80,19 @@ impl FnCall { /// 4. Creates the stack space needed for the return area. /// 5. Emits the call. /// 6. Cleans up the stack space. - pub fn emit( + pub fn emit<'a, M: MacroAssembler, P: PtrSize>( masm: &mut M, context: &mut CodeGenContext, - mut resolve: R, - ) where - R: FnMut(&mut CodeGenContext) -> Callee, - { - let callee = resolve(context); - let ptr_type = ptr_type_from_ptr_size(context.vmoffsets.ptr.size()); - let mut sig = Self::get_sig::(&callee, ptr_type); - let kind = Self::map(&context.vmoffsets, &callee, sig.as_ref(), context, masm); + callee: Callee<'a>, + ) { + let sig = callee.sig(); + let kind = Self::map(&context.vmoffsets, &callee, sig, context, masm); context.spill(masm); let ret_area = Self::make_ret_area(&sig, masm); let arg_stack_space = sig.params_stack_size(); let reserved_stack = masm.call(arg_stack_space, |masm| { - Self::assign(sig.as_ref(), ret_area.as_ref(), context, masm); + Self::assign(sig, ret_area.as_ref(), context, masm); kind }); @@ -108,7 +101,7 @@ impl FnCall { _ => {} } - Self::cleanup(&mut sig, reserved_stack, ret_area, masm, context); + Self::cleanup(sig, reserved_stack, ret_area, masm, context); } /// Calculates the return area for the callee, if any. @@ -123,30 +116,6 @@ impl FnCall { }) } - /// Derive the [`ABISig`] for a particular [`Callee`]. - fn get_sig(callee: &Callee, ptr_type: WasmValType) -> Cow<'_, ABISig> { - match callee { - Callee::Builtin(info) => Cow::Borrowed(info.sig()), - Callee::Import(info) => { - let mut params: SmallVec<[WasmValType; 6]> = - SmallVec::with_capacity(info.ty.params().len() + 2); - params.extend_from_slice(&[ptr_type, ptr_type]); - params.extend_from_slice(info.ty.params()); - Cow::Owned(::sig_from( - ¶ms, - info.ty.returns(), - &CallingConvention::Default, - )) - } - Callee::Local(info) => { - Cow::Owned(::sig(&info.ty, &CallingConvention::Default)) - } - Callee::FuncRef(ty) => { - Cow::Owned(::sig(&ty, &CallingConvention::Default)) - } - } - } - /// Maps the given [`Callee`] to a [`CalleeKind`]. fn map( vmoffsets: &VMOffsets

, @@ -221,7 +190,7 @@ impl FnCall { let location = stack.len().checked_sub(sig.params.len() - 2).unwrap_or(0); context.stack.insert_many( location, - [ + &[ TypedReg::new(ptr_type, callee_vmctx).into(), TypedReg::new(ptr_type, caller_vmctx).into(), ], @@ -303,7 +272,7 @@ impl FnCall { /// Cleanup stack space, handle multiple results, and free registers after /// emitting the call. fn cleanup( - sig: &mut Cow<'_, ABISig>, + sig: &ABISig, reserved_space: u32, ret_area: Option, masm: &mut M, @@ -340,22 +309,22 @@ impl FnCall { // Free the bytes consumed by the call. masm.free_stack(stack_consumed); - if let Some(area) = ret_area { - debug_assert!(!area.is_uninit()); + let ret_area = ret_area.map(|area| { if stack_consumed > 0 { - sig.to_mut() - .results - .set_ret_area(RetArea::sp(masm.sp_offset())); + // If there's a return area and stack space was consumed by the + // call, adjust the return area to be to the current stack + // pointer offset. + RetArea::sp(masm.sp_offset()) } else { - // If theres a return area, and no memory was adjusted - // (memmoved), the offsets should be equal. + // Else if no stack space was consumed by the call, simply use + // the previously calculated area. debug_assert_eq!(area.unwrap_sp(), masm.sp_offset()); - sig.to_mut().results.set_ret_area(area); + area } - } - - context.push_abi_results(&sig.results, masm, |results, _, _| { - results.ret_area().copied() }); + + // In the case of [Callee], there's no need to set the [RetArea] of the + // signature, as it's only used here to push abi results. + context.push_abi_results(&sig.results, masm, |_, _, _| ret_area); } } diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index 7d62780670e6..542c1c246c5b 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -3,7 +3,6 @@ use wasmtime_environ::{VMOffsets, WasmHeapType, WasmValType}; use super::ControlStackFrame; use crate::{ abi::{ABIOperand, ABIResults, RetArea, ABI}, - codegen::BuiltinFunctions, frame::Frame, isa::reg::RegClass, masm::{MacroAssembler, OperandSize, RegImm, SPOffset, StackSlot}, @@ -27,7 +26,7 @@ use crate::{ /// generation process. The code generation context should /// be generally used as the single entry point to access /// the compound functionality provided by its elements. -pub(crate) struct CodeGenContext<'a, 'builtins: 'a> { +pub(crate) struct CodeGenContext<'a> { /// The register allocator. pub regalloc: RegAlloc, /// The value stack. @@ -36,19 +35,16 @@ pub(crate) struct CodeGenContext<'a, 'builtins: 'a> { pub frame: Frame, /// Reachability state. pub reachable: bool, - /// The built-in functions available to the JIT code. - pub builtins: &'builtins mut BuiltinFunctions, /// A reference to the VMOffsets. pub vmoffsets: &'a VMOffsets, } -impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { +impl<'a> CodeGenContext<'a> { /// Create a new code generation context. pub fn new( regalloc: RegAlloc, stack: Stack, frame: Frame, - builtins: &'builtins mut BuiltinFunctions, vmoffsets: &'a VMOffsets, ) -> Self { Self { @@ -56,7 +52,6 @@ impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { stack, frame, reachable: true, - builtins, vmoffsets, } } diff --git a/winch/codegen/src/codegen/env.rs b/winch/codegen/src/codegen/env.rs index 3c1bdf2c0521..58f5f151fa9d 100644 --- a/winch/codegen/src/codegen/env.rs +++ b/winch/codegen/src/codegen/env.rs @@ -1,7 +1,9 @@ use crate::{ - codegen::{control, BlockSig, BuiltinFunction, OperandSize}, - isa::TargetIsa, + abi::{ABISig, ABI}, + codegen::{control, BlockSig, BuiltinFunction, BuiltinFunctions, OperandSize}, + isa::{CallingConvention, TargetIsa}, }; +use smallvec::SmallVec; use std::collections::{ hash_map::Entry::{Occupied, Vacant}, HashMap, @@ -10,7 +12,7 @@ use wasmparser::BlockType; use wasmtime_environ::{ FuncIndex, GlobalIndex, MemoryIndex, MemoryPlan, MemoryStyle, ModuleTranslation, ModuleTypesBuilder, PtrSize, TableIndex, TablePlan, TypeConvert, TypeIndex, VMOffsets, - WasmFuncType, WasmHeapType, WasmValType, WASM_PAGE_SIZE, + WasmHeapType, WasmValType, WASM_PAGE_SIZE, }; /// Table metadata. @@ -74,23 +76,34 @@ pub struct HeapData { /// It categorizes how the callee should be treated /// when performing the call. #[derive(Clone)] -pub enum Callee { +pub(crate) enum Callee<'a> { /// Locally defined function. - Local(CalleeInfo), + Local(&'a CalleeInfo), /// Imported function. - Import(CalleeInfo), + Import(&'a CalleeInfo), /// Function reference. - FuncRef(WasmFuncType), + FuncRef(&'a ABISig), /// A built-in function. Builtin(BuiltinFunction), } +impl<'a> Callee<'a> { + /// Returns the [ABISig] of the [Callee]. + pub(crate) fn sig(&'a self) -> &'a ABISig { + match self { + Self::Local(info) | Self::Import(info) => &info.sig, + Self::FuncRef(sig) => sig, + Self::Builtin(b) => b.sig(), + } + } +} + /// Metadata about a function callee. Used by the code generation to /// emit function calls to local or imported functions. #[derive(Clone)] pub struct CalleeInfo { - /// The function type. - pub ty: WasmFuncType, + /// The function's ABI signature. + pub sig: ABISig, /// The callee index in the WebAssembly function index space. pub index: FuncIndex, } @@ -106,10 +119,20 @@ pub struct FuncEnv<'a, 'translation: 'a, 'data: 'translation, P: PtrSize> { pub translation: &'translation ModuleTranslation<'data>, /// The module's function types. pub types: &'translation ModuleTypesBuilder, + /// The built-in functions available to the JIT code. + pub builtins: &'translation mut BuiltinFunctions, /// Track resolved table information. resolved_tables: HashMap, /// Track resolved heap information. resolved_heaps: HashMap, + /// A map from [FunctionIndex] to [CalleeInfo], to keep track of the resolved + /// function callees. + resolved_callees: HashMap, + /// A map from [TypeIndex] to [ABISig], to keep track of the resolved + /// indirect function signatures. + resolved_sigs: HashMap, + /// Pointer size represented as a WebAssembly type. + ptr_type: WasmValType, /// Whether or not to enable Spectre mitigation on heap bounds checks. heap_access_spectre_mitigation: bool, } @@ -126,6 +149,7 @@ impl<'a, 'translation, 'data, P: PtrSize> FuncEnv<'a, 'translation, 'data, P> { vmoffsets: &'a VMOffsets

, translation: &'translation ModuleTranslation<'data>, types: &'translation ModuleTypesBuilder, + builtins: &'translation mut BuiltinFunctions, isa: &dyn TargetIsa, ) -> Self { Self { @@ -134,40 +158,63 @@ impl<'a, 'translation, 'data, P: PtrSize> FuncEnv<'a, 'translation, 'data, P> { types, resolved_tables: HashMap::new(), resolved_heaps: HashMap::new(), + resolved_callees: HashMap::new(), + resolved_sigs: HashMap::new(), + ptr_type: ptr_type_from_ptr_size(vmoffsets.ptr.size()), heap_access_spectre_mitigation: isa.flags().enable_heap_access_spectre_mitigation(), + builtins, } } /// Derive the [`WasmType`] from the pointer size. pub(crate) fn ptr_type(&self) -> WasmValType { - ptr_type_from_ptr_size(self.ptr_size()) - } - - /// Returns the pointer size for the target ISA. - fn ptr_size(&self) -> u8 { - self.vmoffsets.ptr.size() + self.ptr_type } /// Resolves a [`Callee::FuncRef`] from a type index. - pub fn funcref(&self, idx: TypeIndex) -> Callee { - let sig_index = self.translation.module.types[idx].unwrap_function(); - let ty = self.types[sig_index].clone(); - Callee::FuncRef(ty) + pub(crate) fn funcref(&mut self, idx: TypeIndex) -> Callee + where + A: ABI, + { + let val = || { + let sig_index = self.translation.module.types[idx].unwrap_function(); + let ty = &self.types[sig_index]; + let sig = ::sig(ty, &CallingConvention::Default); + sig + }; + Callee::FuncRef(self.resolved_sigs.entry(idx).or_insert_with(val)) } /// Resolves a function [`Callee`] from an index. - pub fn callee_from_index(&self, idx: FuncIndex) -> Callee { + pub(crate) fn callee_from_index(&mut self, idx: FuncIndex) -> Callee + where + A: ABI, + { + let ptr = self.ptr_type(); let types = &self.translation.get_types(); let ty = types[types.core_function_at(idx.as_u32())].unwrap_func(); let ty = self.convert_func_type(ty); let import = self.translation.module.is_imported_function(idx); + let val = || { + let info = if import { + let mut params: SmallVec<[WasmValType; 6]> = + SmallVec::with_capacity(ty.params().len() + 2); + params.extend_from_slice(&[ptr, ptr]); + params.extend_from_slice(ty.params()); + let sig = ::sig_from(¶ms, ty.returns(), &CallingConvention::Default); + CalleeInfo { sig, index: idx } + } else { + let sig = ::sig(&ty, &CallingConvention::Default); + CalleeInfo { sig, index: idx } + }; - let info = CalleeInfo { ty, index: idx }; + info + }; if import { - Callee::Import(info) + Callee::Import(self.resolved_callees.entry(idx).or_insert_with(val)) } else { - Callee::Local(info) + Callee::Local(self.resolved_callees.entry(idx).or_insert_with(val)) } } diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index 210179bc146e..4aac8b6e12db 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -37,7 +37,7 @@ where pub sig: ABISig, /// The code generation context. - pub context: CodeGenContext<'a, 'translation>, + pub context: CodeGenContext<'a>, /// A reference to the function compilation environment. pub env: FuncEnv<'a, 'translation, 'data, M::Ptr>, @@ -57,7 +57,7 @@ where { pub fn new( masm: &'a mut M, - context: CodeGenContext<'a, 'translation>, + context: CodeGenContext<'a>, env: FuncEnv<'a, 'translation, 'data, M::Ptr>, sig: ABISig, ) -> Self { @@ -368,7 +368,7 @@ where let table_data = self.env.resolve_table_data(table_index); let ptr_type = self.env.ptr_type(); let builtin = self - .context + .env .builtins .table_get_lazy_init_func_ref::(); @@ -416,9 +416,11 @@ where // This is safe since the FnCall::emit call below, will ensure // that the result register is placed on the value stack. self.context.free_reg(elem_value); - FnCall::emit::(self.masm, &mut self.context, |_| { - Callee::Builtin(builtin.clone()) - }); + FnCall::emit::( + self.masm, + &mut self.context, + Callee::Builtin(builtin.clone()), + ); // We know the signature of the libcall in this case, so we assert that there's // one element in the stack and that it's the ABI signature's result register. diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index c10f8f1006a4..01fa6fd27012 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -1,7 +1,7 @@ use super::{abi::Aarch64ABI, address::Address, asm::Assembler, regs}; use crate::{ abi::{self, local::LocalSlot}, - codegen::{ptr_type_from_ptr_size, CodeGenContext, HeapData, TableData}, + codegen::{ptr_type_from_ptr_size, CodeGenContext, FuncEnv, HeapData, TableData}, isa::reg::Reg, masm::{ CalleeKind, DivKind, ExtendKind, FloatCmpKind, Imm as I, IntCmpKind, @@ -328,11 +328,13 @@ impl Masm for MacroAssembler { todo!() } - fn float_round( + fn float_round, &mut CodeGenContext, &mut Self)>( &mut self, _mode: RoundingMode, + _env: &mut FuncEnv, _context: &mut CodeGenContext, _size: OperandSize, + _fallback: F, ) { todo!(); } diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index f66215107f52..c4e1924547e3 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -98,7 +98,7 @@ impl TargetIsa for Aarch64 { let stack = Stack::new(); let abi_sig = abi::Aarch64ABI::sig(sig, &CallingConvention::Default); - let env = FuncEnv::new(&vmoffsets, translation, types, self); + let env = FuncEnv::new(&vmoffsets, translation, types, builtins, self); let defined_locals = DefinedLocals::new::(&env, &mut body, validator)?; let frame = Frame::new::(&abi_sig, &defined_locals)?; let gpr = RegBitSet::int( @@ -109,7 +109,7 @@ impl TargetIsa for Aarch64 { // TODO: Add floating point bitmask let fpr = RegBitSet::float(0, 0, usize::try_from(MAX_FPR).unwrap()); let regalloc = RegAlloc::from(gpr, fpr); - let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets); + let codegen_context = CodeGenContext::new(regalloc, stack, frame, &vmoffsets); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index b03c63783d3b..752feb621de2 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -18,16 +18,15 @@ use cranelift_codegen::{ ShiftKind as CraneliftShiftKind, SseOpcode, SyntheticAmode, WritableGpr, WritableXmm, Xmm, XmmMem, XmmMemAligned, CC, }, - settings as x64_settings, CallInfo, EmitInfo, EmitState, Inst, + settings as x64_settings, EmitInfo, EmitState, Inst, }, - CallConv, }, settings, Final, MachBuffer, MachBufferFinalized, MachInstEmit, MachInstEmitState, MachLabel, VCodeConstantData, VCodeConstants, Writable, }; use super::address::Address; -use smallvec::{smallvec, SmallVec}; +use smallvec::SmallVec; // Conversions between winch-codegen x64 types and cranelift-codegen x64 types. @@ -1246,14 +1245,8 @@ impl Assembler { pub fn call_with_reg(&mut self, callee: Reg) { self.emit(Inst::CallUnknown { dest: RegMem::reg(callee.into()), - info: Box::new(CallInfo { - uses: smallvec![], - defs: smallvec![], - clobbers: Default::default(), - opcode: Opcode::Call, - callee_pop_size: 0, - callee_conv: CallConv::SystemV, - }), + opcode: Opcode::Call, + info: None, }); } @@ -1262,14 +1255,8 @@ impl Assembler { let dest = ExternalName::user(UserExternalNameRef::new(index as usize)); self.emit(Inst::CallKnown { dest, - info: Box::new(CallInfo { - uses: smallvec![], - defs: smallvec![], - clobbers: Default::default(), - opcode: Opcode::Call, - callee_pop_size: 0, - callee_conv: CallConv::SystemV, - }), + opcode: Opcode::Call, + info: None, }); } @@ -1278,14 +1265,8 @@ impl Assembler { let dest = ExternalName::LibCall(lib); self.emit(Inst::CallKnown { dest, - info: Box::new(CallInfo { - uses: smallvec![], - defs: smallvec![], - clobbers: Default::default(), - opcode: Opcode::Call, - callee_pop_size: 0, - callee_conv: CallConv::SystemV, - }), + opcode: Opcode::Call, + info: None, }); } diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 433dfb651045..9e95ba848c59 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -16,7 +16,7 @@ use crate::{ }; use crate::{ abi::{self, align_to, calculate_frame_adjustment, LocalSlot}, - codegen::{ptr_type_from_ptr_size, Callee, CodeGenContext, FnCall, HeapData, TableData}, + codegen::{ptr_type_from_ptr_size, CodeGenContext, FuncEnv, HeapData, TableData}, stack::Val, }; use crate::{ @@ -591,42 +591,20 @@ impl Masm for MacroAssembler { self.asm.xmm_and_rr(scratch_xmm, dst, size); } - fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize) { + fn float_round, &mut CodeGenContext, &mut Self)>( + &mut self, + mode: RoundingMode, + env: &mut FuncEnv, + context: &mut CodeGenContext, + size: OperandSize, + mut fallback: F, + ) { if self.flags.has_sse41() { let src = context.pop_to_reg(self, None); self.asm.xmm_rounds_rr(src.into(), src.into(), mode, size); context.stack.push(src.into()); } else { - FnCall::emit::(self, context, |context| { - let b = match (&mode, size) { - (RoundingMode::Up, OperandSize::S32) => { - context.builtins.ceil_f32::<::ABI>() - } - (RoundingMode::Up, OperandSize::S64) => { - context.builtins.ceil_f64::<::ABI>() - } - (RoundingMode::Down, OperandSize::S32) => { - context.builtins.floor_f32::<::ABI>() - } - (RoundingMode::Down, OperandSize::S64) => { - context.builtins.floor_f64::<::ABI>() - } - (RoundingMode::Nearest, OperandSize::S32) => { - context.builtins.nearest_f32::<::ABI>() - } - (RoundingMode::Nearest, OperandSize::S64) => { - context.builtins.nearest_f64::<::ABI>() - } - (RoundingMode::Zero, OperandSize::S32) => { - context.builtins.trunc_f32::<::ABI>() - } - (RoundingMode::Zero, OperandSize::S64) => { - context.builtins.trunc_f64::<::ABI>() - } - (_, _) => unreachable!(), - }; - Callee::Builtin(b) - }) + fallback(env, context, self); } } diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index 59ecc31e2314..f63c51d5abb0 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -107,7 +107,7 @@ impl TargetIsa for X64 { let stack = Stack::new(); let abi_sig = abi::X64ABI::sig(sig, &CallingConvention::Default); - let env = FuncEnv::new(&vmoffsets, translation, types, self); + let env = FuncEnv::new(&vmoffsets, translation, types, builtins, self); let defined_locals = DefinedLocals::new::(&env, &mut body, validator)?; let frame = Frame::new::(&abi_sig, &defined_locals)?; let gpr = RegBitSet::int( @@ -122,7 +122,7 @@ impl TargetIsa for X64 { ); let regalloc = RegAlloc::from(gpr, fpr); - let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets); + let codegen_context = CodeGenContext::new(regalloc, stack, frame, &vmoffsets); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index b3dd603f0c7f..877d33d9df90 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1,5 +1,5 @@ use crate::abi::{self, align_to, LocalSlot}; -use crate::codegen::{CodeGenContext, HeapData, TableData}; +use crate::codegen::{CodeGenContext, FuncEnv, HeapData, TableData}; use crate::isa::reg::Reg; use cranelift_codegen::{ ir::{Endianness, LibCall, MemFlags}, @@ -603,7 +603,14 @@ pub(crate) trait MacroAssembler { fn float_neg(&mut self, dst: Reg, size: OperandSize); /// Perform a floating point floor operation. - fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize); + fn float_round, &mut CodeGenContext, &mut Self)>( + &mut self, + mode: RoundingMode, + env: &mut FuncEnv, + context: &mut CodeGenContext, + size: OperandSize, + fallback: F, + ); /// Perform a floating point square root operation. fn float_sqrt(&mut self, dst: Reg, src: Reg, size: OperandSize); diff --git a/winch/codegen/src/regalloc.rs b/winch/codegen/src/regalloc.rs index 2bcb8db3063a..578fea6b5792 100644 --- a/winch/codegen/src/regalloc.rs +++ b/winch/codegen/src/regalloc.rs @@ -54,7 +54,7 @@ impl RegAlloc { spill(self); self.regset .reg(named) - .expect(&format!("register {:?} to be available", named)) + .unwrap_or_else(|| panic!("Exepected register {:?} to be available", named)) }) } diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index 5e3cde00b274..0e464c75c1f4 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -1,4 +1,5 @@ use crate::{isa::reg::Reg, masm::StackSlot}; +use smallvec::SmallVec; use wasmparser::{Ieee32, Ieee64}; use wasmtime_environ::WasmValType; @@ -260,7 +261,8 @@ impl Val { /// The shadow stack used for compilation. #[derive(Default, Debug)] pub(crate) struct Stack { - inner: Vec, + // NB: The 64 is chosen arbitrarily. We can adjust as we see fit. + inner: SmallVec<[Val; 64]>, } impl Stack { @@ -294,15 +296,13 @@ impl Stack { } /// Inserts many values at the given index. - pub fn insert_many(&mut self, at: usize, values: impl IntoIterator) { + pub fn insert_many(&mut self, at: usize, values: &[Val]) { debug_assert!(at <= self.len()); - // If last, simply extend. - if at == self.inner.len() { - self.inner.extend(values); + + if at == self.len() { + self.inner.extend_from_slice(values); } else { - let mut tail = self.inner.split_off(at); - self.inner.extend(values); - self.inner.append(&mut tail); + self.inner.insert_from_slice(at, values); } } @@ -375,12 +375,12 @@ impl Stack { } /// Get a mutable reference to the inner stack representation. - pub fn inner_mut(&mut self) -> &mut Vec { + pub fn inner_mut(&mut self) -> &mut SmallVec<[Val; 64]> { &mut self.inner } /// Get a reference to the inner stack representation. - pub fn inner(&self) -> &Vec { + pub fn inner(&self) -> &SmallVec<[Val; 64]> { &self.inner } diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index d52b2a49e7db..b67fd5281ed9 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -453,43 +453,107 @@ where } fn visit_f32_floor(&mut self) { - self.masm - .float_round(RoundingMode::Down, &mut self.context, OperandSize::S32); + self.masm.float_round::<_>( + RoundingMode::Down, + &mut self.env, + &mut self.context, + OperandSize::S32, + |env, cx, masm| { + let builtin = env.builtins.floor_f32::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)); + }, + ); } fn visit_f64_floor(&mut self) { - self.masm - .float_round(RoundingMode::Down, &mut self.context, OperandSize::S64); + self.masm.float_round( + RoundingMode::Down, + &mut self.env, + &mut self.context, + OperandSize::S64, + |env, cx, masm| { + let builtin = env.builtins.floor_f64::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)); + }, + ); } fn visit_f32_ceil(&mut self) { - self.masm - .float_round(RoundingMode::Up, &mut self.context, OperandSize::S32); + self.masm.float_round( + RoundingMode::Up, + &mut self.env, + &mut self.context, + OperandSize::S32, + |env, cx, masm| { + let builtin = env.builtins.ceil_f32::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)); + }, + ); } fn visit_f64_ceil(&mut self) { - self.masm - .float_round(RoundingMode::Up, &mut self.context, OperandSize::S64); + self.masm.float_round( + RoundingMode::Up, + &mut self.env, + &mut self.context, + OperandSize::S64, + |env, cx, masm| { + let builtin = env.builtins.ceil_f64::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)); + }, + ); } fn visit_f32_nearest(&mut self) { - self.masm - .float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S32); + self.masm.float_round( + RoundingMode::Nearest, + &mut self.env, + &mut self.context, + OperandSize::S32, + |env, cx, masm| { + let builtin = env.builtins.nearest_f32::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)) + }, + ); } fn visit_f64_nearest(&mut self) { - self.masm - .float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S64); + self.masm.float_round( + RoundingMode::Nearest, + &mut self.env, + &mut self.context, + OperandSize::S64, + |env, cx, masm| { + let builtin = env.builtins.nearest_f64::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)); + }, + ); } fn visit_f32_trunc(&mut self) { - self.masm - .float_round(RoundingMode::Zero, &mut self.context, OperandSize::S32); + self.masm.float_round( + RoundingMode::Zero, + &mut self.env, + &mut self.context, + OperandSize::S32, + |env, cx, masm| { + let builtin = env.builtins.trunc_f32::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)); + }, + ); } fn visit_f64_trunc(&mut self) { - self.masm - .float_round(RoundingMode::Zero, &mut self.context, OperandSize::S64); + self.masm.float_round( + RoundingMode::Zero, + &mut self.env, + &mut self.context, + OperandSize::S64, + |env, cx, masm| { + let builtin = env.builtins.trunc_f64::(); + FnCall::emit::(masm, cx, Callee::Builtin(builtin)); + }, + ); } fn visit_f32_sqrt(&mut self) { @@ -1291,8 +1355,12 @@ where } fn visit_call(&mut self, index: u32) { - let callee = self.env.callee_from_index(FuncIndex::from_u32(index)); - FnCall::emit::(self.masm, &mut self.context, |_| callee.clone()); + FnCall::emit::( + self.masm, + &mut self.context, + self.env + .callee_from_index::(FuncIndex::from_u32(index)), + ) } fn visit_call_indirect(&mut self, type_index: u32, table_index: u32, _: u8) { @@ -1321,9 +1389,11 @@ where } } - FnCall::emit::(self.masm, &mut self.context, |_| { - self.env.funcref(type_index) - }) + FnCall::emit::( + self.masm, + &mut self.context, + self.env.funcref::(type_index), + ) } fn visit_table_init(&mut self, elem: u32, table: u32) { @@ -1335,15 +1405,19 @@ where self.context.stack.insert_many( at, - [ + &[ vmctx.into(), table.try_into().unwrap(), elem.try_into().unwrap(), ], ); - FnCall::emit::(self.masm, &mut self.context, |cx| { - Callee::Builtin(cx.builtins.table_init::()) - }); + + let builtin = self.env.builtins.table_init::(); + FnCall::emit::( + self.masm, + &mut self.context, + Callee::Builtin(builtin.clone()), + ) } fn visit_table_copy(&mut self, dst: u32, src: u32) { @@ -1353,16 +1427,15 @@ where let at = self.context.stack.len() - 3; self.context.stack.insert_many( at, - [ + &[ vmctx.into(), dst.try_into().unwrap(), src.try_into().unwrap(), ], ); - FnCall::emit::(self.masm, &mut self.context, |context| { - Callee::Builtin(context.builtins.table_copy::()) - }); + let builtin = self.env.builtins.table_copy::(); + FnCall::emit::(self.masm, &mut self.context, Callee::Builtin(builtin)) } fn visit_table_get(&mut self, table: u32) { @@ -1385,10 +1458,7 @@ where let table_index = TableIndex::from_u32(table); let table_plan = self.env.table_plan(table_index); let builtin = match table_plan.table.wasm_ty.heap_type { - WasmHeapType::Func => self - .context - .builtins - .table_grow_func_ref::(), + WasmHeapType::Func => self.env.builtins.table_grow_func_ref::(), ty => unimplemented!("Support for HeapType: {ty}"), }; @@ -1406,11 +1476,13 @@ where self.context.stack.inner_mut().swap(len - 1, len - 2); self.context .stack - .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); + .insert_many(at, &[vmctx.into(), table.try_into().unwrap()]); - FnCall::emit::(self.masm, &mut self.context, |_| { - Callee::Builtin(builtin.clone()) - }); + FnCall::emit::( + self.masm, + &mut self.context, + Callee::Builtin(builtin.clone()), + ) } fn visit_table_size(&mut self, table: u32) { @@ -1425,10 +1497,7 @@ where let table_index = TableIndex::from_u32(table); let table_plan = self.env.table_plan(table_index); let builtin = match table_plan.table.wasm_ty.heap_type { - WasmHeapType::Func => self - .context - .builtins - .table_fill_func_ref::(), + WasmHeapType::Func => self.env.builtins.table_fill_func_ref::(), ty => unimplemented!("Support for heap type: {ty}"), }; @@ -1437,10 +1506,12 @@ where let at = len - 3; self.context .stack - .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); - FnCall::emit::(self.masm, &mut self.context, |_| { - Callee::Builtin(builtin.clone()) - }) + .insert_many(at, &[vmctx.into(), table.try_into().unwrap()]); + FnCall::emit::( + self.masm, + &mut self.context, + Callee::Builtin(builtin.clone()), + ) } fn visit_table_set(&mut self, table: u32) { @@ -1482,14 +1553,12 @@ where fn visit_elem_drop(&mut self, index: u32) { let ptr_type = self.env.ptr_type(); - let elem_drop = self.context.builtins.elem_drop::(); + let elem_drop = self.env.builtins.elem_drop::(); let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); self.context .stack .extend([vmctx.into(), index.try_into().unwrap()]); - FnCall::emit::(self.masm, &mut self.context, |_| { - Callee::Builtin(elem_drop.clone()) - }); + FnCall::emit::(self.masm, &mut self.context, Callee::Builtin(elem_drop)) } fn visit_memory_init(&mut self, data_index: u32, mem: u32) { @@ -1499,15 +1568,14 @@ where let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); self.context.stack.insert_many( at, - [ + &[ vmctx.into(), mem.try_into().unwrap(), data_index.try_into().unwrap(), ], ); - FnCall::emit::(self.masm, &mut self.context, |cx| { - Callee::Builtin(cx.builtins.memory_init::()) - }); + let builtin = self.env.builtins.memory_init::(); + FnCall::emit::(self.masm, &mut self.context, Callee::Builtin(builtin)) } fn visit_memory_copy(&mut self, dst_mem: u32, src_mem: u32) { @@ -1522,17 +1590,17 @@ where let at = self.context.stack.len() - 2; self.context .stack - .insert_many(at, [src_mem.try_into().unwrap()]); + .insert_many(at, &[src_mem.try_into().unwrap()]); // One element was inserted above, so instead of 3, we use 4. let at = self.context.stack.len() - 4; self.context .stack - .insert_many(at, [vmctx.into(), dst_mem.try_into().unwrap()]); + .insert_many(at, &[vmctx.into(), dst_mem.try_into().unwrap()]); - FnCall::emit::(self.masm, &mut self.context, |cx| { - Callee::Builtin(cx.builtins.memory_copy::()) - }); + let builtin = self.env.builtins.memory_copy::(); + + FnCall::emit::(self.masm, &mut self.context, Callee::Builtin(builtin)) } fn visit_memory_fill(&mut self, mem: u32) { @@ -1543,11 +1611,10 @@ where self.context .stack - .insert_many(at, [vmctx.into(), mem.try_into().unwrap()]); + .insert_many(at, &[vmctx.into(), mem.try_into().unwrap()]); - FnCall::emit::(self.masm, &mut self.context, |cx| { - Callee::Builtin(cx.builtins.memory_fill::()) - }); + let builtin = self.env.builtins.memory_fill::(); + FnCall::emit::(self.masm, &mut self.context, Callee::Builtin(builtin)) } fn visit_memory_size(&mut self, mem: u32, _: u8) { @@ -1563,12 +1630,11 @@ where // The stack at this point contains: [ delta ] // The desired state is // [ vmctx, delta, index ] - self.context.stack.insert_many(at, [vmctx.into()]); + self.context.stack.insert_many(at, &[vmctx.into()]); self.context.stack.extend([mem.try_into().unwrap()]); - FnCall::emit::(self.masm, &mut self.context, |cx| { - Callee::Builtin(cx.builtins.memory32_grow::()) - }); + let builtin = self.env.builtins.memory32_grow::(); + FnCall::emit::(self.masm, &mut self.context, Callee::Builtin(builtin)) } fn visit_data_drop(&mut self, data_index: u32) { @@ -1578,9 +1644,8 @@ where .stack .extend([vmctx.into(), data_index.try_into().unwrap()]); - FnCall::emit::(self.masm, &mut self.context, |cx| { - Callee::Builtin(cx.builtins.data_drop::()) - }); + let builtin = self.env.builtins.data_drop::(); + FnCall::emit::(self.masm, &mut self.context, Callee::Builtin(builtin)) } fn visit_nop(&mut self) {}