diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 37ff419335e6..fe2c45236e04 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -1,18 +1,29 @@ use anyhow::Result; use object::write::{Object, SymbolId}; use std::any::Any; +use std::mem; use std::sync::Mutex; use wasmparser::FuncValidatorAllocations; use wasmtime_cranelift_shared::{CompiledFunction, ModuleTextBuilder}; use wasmtime_environ::{ CompileError, DefinedFuncIndex, FilePos, FuncIndex, FunctionBodyData, FunctionLoc, - ModuleTranslation, ModuleTypes, PrimaryMap, TrapEncodingBuilder, WasmFunctionInfo, + ModuleTranslation, ModuleTypes, PrimaryMap, TrapEncodingBuilder, VMOffsets, WasmFunctionInfo, }; -use winch_codegen::{TargetIsa, TrampolineKind}; +use winch_codegen::{BuiltinFunctions, TargetIsa, TrampolineKind}; + +/// Function compilation context. +/// This struct holds information that can be shared globally across +/// all function compilations. +struct CompilationContext { + /// Validator allocations. + allocations: FuncValidatorAllocations, + /// Builtin functions available to JIT code. + builtins: BuiltinFunctions, +} pub(crate) struct Compiler { isa: Box, - allocations: Mutex>, + contexts: Mutex>, } /// The compiled function environment. @@ -30,20 +41,26 @@ impl Compiler { pub fn new(isa: Box) -> Self { Self { isa, - allocations: Mutex::new(Vec::new()), + contexts: Mutex::new(Vec::new()), } } - fn take_allocations(&self) -> FuncValidatorAllocations { - self.allocations - .lock() - .unwrap() - .pop() - .unwrap_or_else(Default::default) + /// Get a compilation context or create a new one if none available. + fn get_context(&self, translation: &ModuleTranslation) -> CompilationContext { + self.contexts.lock().unwrap().pop().unwrap_or_else(|| { + let pointer_size = self.isa.pointer_bytes(); + let vmoffsets = VMOffsets::new(pointer_size, &translation.module); + CompilationContext { + allocations: Default::default(), + builtins: BuiltinFunctions::new(&vmoffsets, self.isa.wasmtime_call_conv()), + } + }) } - fn save_allocations(&self, allocs: FuncValidatorAllocations) { - self.allocations.lock().unwrap().push(allocs) + /// Save a compilation context. + fn save_context(&self, mut context: CompilationContext, allocs: FuncValidatorAllocations) { + context.allocations = allocs; + self.contexts.lock().unwrap().push(context); } } @@ -65,12 +82,20 @@ impl wasmtime_environ::Compiler for Compiler { .try_into() .unwrap(), ); - let mut validator = validator.into_validator(self.take_allocations()); + let mut context = self.get_context(translation); + let mut validator = validator.into_validator(mem::take(&mut context.allocations)); let buffer = self .isa - .compile_function(ty, types, &body, &translation, &mut validator) + .compile_function( + ty, + &body, + translation, + types, + &mut context.builtins, + &mut validator, + ) .map_err(|e| CompileError::Codegen(format!("{e:?}"))); - self.save_allocations(validator.into_allocations()); + self.save_context(context, validator.into_allocations()); let buffer = buffer?; let compiled_function = CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index a92bf6bebe47..d5ec3ec84924 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -46,6 +46,7 @@ use crate::isa::{reg::Reg, CallingConvention}; use crate::masm::OperandSize; use smallvec::SmallVec; +use std::collections::HashSet; use std::ops::{Add, BitAnd, Not, Sub}; use wasmtime_environ::{WasmFuncType, WasmHeapType, WasmType}; @@ -237,29 +238,30 @@ pub(crate) struct ABISig { pub result: ABIResult, /// Stack space needed for stack arguments. pub stack_bytes: u32, + /// All the registers used in the [`ABISig`]. + /// Note that this collection is guaranteed to + /// be unique: in some cases some registers might + /// be used as params as a well as returns (e.g. xmm0 in x64). + pub regs: HashSet, } impl ABISig { /// Create a new ABI signature. pub fn new(params: ABIParams, result: ABIResult, stack_bytes: u32) -> Self { + let regs = params + .iter() + .filter_map(|r| r.get_reg()) + .collect::>(); + let result_regs = result.regs(); + let chained = regs.into_iter().chain(result_regs); + Self { params, result, stack_bytes, + regs: HashSet::from_iter(chained), } } - - /// Returns an iterator over all the registers used as params. - pub fn param_regs(&self) -> impl Iterator + '_ { - self.params.iter().filter_map(|r| r.get_reg()) - } - - /// Returns an iterator over all the registers used in the signature. - pub fn regs(&self) -> impl Iterator + '_ { - let params_iter = self.param_regs(); - let result_iter = self.result.regs(); - params_iter.chain(result_iter) - } } /// Returns the size in bytes of a given WebAssembly type. diff --git a/winch/codegen/src/codegen/builtin.rs b/winch/codegen/src/codegen/builtin.rs index 9a5fe34a34d3..9824a9dbd224 100644 --- a/winch/codegen/src/codegen/builtin.rs +++ b/winch/codegen/src/codegen/builtin.rs @@ -5,16 +5,57 @@ use crate::{ codegen::env::ptr_type_from_ptr_size, CallingConvention, }; -use wasmtime_environ::{BuiltinFunctionIndex, PtrSize, WasmType}; +use cranelift_codegen::ir::LibCall; +use std::sync::Arc; +use wasmtime_environ::{BuiltinFunctionIndex, PtrSize, VMOffsets, WasmType}; + +#[derive(Copy, Clone)] +pub(crate) enum BuiltinType { + /// Dynamic built-in function, derived from the VMContext. + Dynamic { + /// The offset of the built-in function. + offset: u32, + /// The built-in function base, relative to the VMContext. + base: u32, + }, + /// A known libcall. + /// See [`cranelift_codegen::ir::LibCall`] for more details. + Known(LibCall), +} + +impl BuiltinType { + /// Create a new dynamic built-in function type. + pub fn dynamic(offset: u32, base: u32) -> Self { + Self::Dynamic { offset, base } + } + + /// Create a new known built-in function type. + pub fn known(libcall: LibCall) -> Self { + Self::Known(libcall) + } +} + +#[derive(Clone)] +pub struct BuiltinFunction { + inner: Arc, +} + +impl BuiltinFunction { + pub(crate) fn sig(&self) -> &ABISig { + &self.inner.sig + } + + pub(crate) fn ty(&self) -> BuiltinType { + self.inner.ty + } +} /// Metadata about a builtin function. -pub(crate) struct BuiltinFunction { +pub struct BuiltinFunctionInner { /// The ABI specific signature of the function. - pub sig: ABISig, - /// The offset of the builtin function - pub offset: u32, - /// The builtin function base, relative to the VMContext. - pub base: u32, + sig: ABISig, + /// The built-in function type. + ty: BuiltinType, } macro_rules! declare_function_sig { @@ -35,6 +76,22 @@ macro_rules! declare_function_sig { ptr_type: WasmType, /// The builtin functions base relative to the VMContext. base: u32, + /// F32 Ceil. + ceil_f32: Option, + /// F64 Ceil. + ceil_f64: Option, + /// F32 Floor. + floor_f32: Option, + /// F64 Floor. + floor_f64: Option, + /// F32 Trunc. + trunc_f32: Option, + /// F64 Trunc. + trunc_f64: Option, + /// F32 Nearest. + nearest_f32: Option, + /// F64 Nearest. + nearest_f64: Option, $( $name: Option, )* @@ -43,13 +100,21 @@ macro_rules! declare_function_sig { // Until all the builtin functions are used. #[allow(dead_code)] impl BuiltinFunctions { - pub fn new(ptr: impl PtrSize, call_conv: CallingConvention, base: u32) -> Self { - let size = ptr.size(); + pub fn new(vmoffsets: &VMOffsets

, call_conv: CallingConvention) -> Self { + let size = vmoffsets.ptr.size(); Self { ptr_size: size, call_conv, - base, + base: vmoffsets.vmctx_builtin_functions(), ptr_type: ptr_type_from_ptr_size(size), + ceil_f32: None, + ceil_f64: None, + floor_f32: None, + floor_f64: None, + trunc_f32: None, + trunc_f64: None, + nearest_f32: None, + nearest_f64: None, $( $name: None, )* @@ -68,6 +133,14 @@ macro_rules! declare_function_sig { WasmType::I32 } + fn f32(&self) -> WasmType { + WasmType::F32 + } + + fn f64(&self) -> WasmType { + WasmType::F64 + } + fn i64(&self) -> WasmType { WasmType::I64 } @@ -76,21 +149,116 @@ macro_rules! declare_function_sig { self.pointer() } + fn over_f64(&self) -> ABISig { + A::sig_from(&[self.f64()], &[self.f64()], &self.call_conv) + } + + fn over_f32(&self) -> ABISig { + A::sig_from(&[self.f64()], &[self.f64()], &self.call_conv) + } + + pub(crate) fn ceil_f32(&mut self) -> BuiltinFunction { + if self.ceil_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::CeilF32) }); + self.ceil_f32 = Some(BuiltinFunction { + inner, + }); + } + self.ceil_f32.as_ref().unwrap().clone() + } + + pub(crate) fn ceil_f64(&mut self) -> BuiltinFunction { + if self.ceil_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::CeilF64) }); + self.ceil_f64 = Some(BuiltinFunction { + inner, + }); + } + self.ceil_f64.as_ref().unwrap().clone() + } + + pub(crate) fn floor_f32(&mut self) -> BuiltinFunction { + if self.floor_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::FloorF32) }); + self.floor_f32 = Some(BuiltinFunction { + inner, + }); + } + self.floor_f32.as_ref().unwrap().clone() + } + + pub(crate) fn floor_f64(&mut self) -> BuiltinFunction { + if self.floor_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::FloorF64) }); + self.floor_f64 = Some(BuiltinFunction { + inner, + }); + } + self.floor_f64.as_ref().unwrap().clone() + } + + pub(crate) fn trunc_f32(&mut self) -> BuiltinFunction { + if self.trunc_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::TruncF32) }); + self.trunc_f32 = Some(BuiltinFunction { + inner, + }); + } + self.trunc_f32.as_ref().unwrap().clone() + } + + pub(crate) fn trunc_f64(&mut self) -> BuiltinFunction { + if self.trunc_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::TruncF64) }); + self.trunc_f64 = Some(BuiltinFunction { + inner, + }); + } + self.trunc_f64.as_ref().unwrap().clone() + } + + pub(crate) fn nearest_f32(&mut self) -> BuiltinFunction { + if self.nearest_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::NearestF32) }); + self.nearest_f32 = Some(BuiltinFunction { + inner, + }); + } + self.nearest_f32.as_ref().unwrap().clone() + } + + pub(crate) fn nearest_f64(&mut self) -> BuiltinFunction { + if self.nearest_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::NearestF64) }); + self.nearest_f64 = Some(BuiltinFunction { + inner, + }); + } + self.nearest_f64.as_ref().unwrap().clone() + } + $( - pub(crate) fn $name(&mut self) -> &BuiltinFunction { + pub(crate) fn $name(&mut self) -> BuiltinFunction { if self.$name.is_none() { let params = vec![ $(self.$param() ),* ]; let result = vec![ $(self.$result() )?]; let sig = A::sig_from(¶ms, &result, &self.call_conv); let index = BuiltinFunctionIndex::$name(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::dynamic(index.index() * (self.ptr_size as u32), self.base) }); self.$name = Some(BuiltinFunction { - sig, - offset: index.index() * (self.ptr_size as u32), - base: self.base, + inner, }); } - self.$name.as_ref().unwrap() + self.$name.as_ref().unwrap().clone() } )* } diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index 3f9bbba93c4d..d612c3391b8f 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -1,330 +1,277 @@ //! Function call emission. For more details around the ABI and //! calling convention, see [ABI]. +//! +//! This module exposes a single function [`FnCall::emit`], which is responsible +//! of orchestrating the emission of calls. In general such orchestration +//! takes place in 4 steps: +//! +//! 1. [`Callee`] resolution. +//! 2. Mapping of the [`Callee`] to the [`CalleeKind`]. +//! 3. Calculation of the stack space consumed by the call. +//! 4. Emission. +//! +//! The stack space consumed by the function call; that is, +//! the sum of: +//! +//! 1. The amount of stack space created by saving any live +//! registers at the callsite. +//! 2. The amount of space used by any memory entries in the value +//! stack present at the callsite, that will be used as +//! arguments for the function call. Any memory values in the +//! value stack that are needed as part of the function +//! arguments, will be consumed by the function call (either by +//! assigning those values to a register or by storing those +//! values to a memory location if the callee argument is on +//! the stack), so we track that stack space to reclaim it once +//! the function call has ended. This could also be done in +//! when assigning arguments everytime a memory entry needs to be assigned +//! to a particular location, but doing so, will incur in more +//! instructions (e.g. a pop per argument that needs to be +//! assigned); it's more efficient to track the space needed by +//! those memory values and reclaim it at once. +//! +//! The machine stack throghout the function call is as follows: +//! ┌──────────────────────────────────────────────────┐ +//! │ │ +//! │ 1 │ +//! │ Stack space created by any previous spills │ +//! │ from the value stack; and which memory values │ +//! │ are used as function arguments. │ +//! │ │ +//! ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like: +//! │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ] +//! │ 2 │ +//! │ Stack space created by saving │ +//! │ any live registers at the callsite. │ +//! │ │ +//! │ │ +//! ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like: +//! │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ] +//! │ │ Assuming that the callee takes 4 arguments, we calculate +//! │ │ 2 spilled registers + 2 memory values; all of which will be used +//! │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is +//! │ the callee function arguments in the stack; │ is considered to be consumed by the call. +//! │ represented by `arg_stack_space` │ +//! │ │ +//! │ │ +//! │ │ +//! └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call + use crate::{ - abi::{ABIArg, ABIResult, ABISig, ABI}, - codegen::{BuiltinFunction, CodeGenContext}, + abi::{ABIArg, ABISig, ABI}, + codegen::{ + ptr_type_from_ptr_size, BuiltinFunction, BuiltinType, Callee, CalleeInfo, CodeGenContext, + TypedReg, + }, masm::{CalleeKind, MacroAssembler, OperandSize}, reg::Reg, + CallingConvention, }; -use wasmtime_environ::FuncIndex; +use smallvec::SmallVec; +use wasmtime_environ::{PtrSize, VMOffsets, WasmType}; /// All the information needed to emit a function call. #[derive(Copy, Clone)] -pub(crate) struct FnCall<'a> { - /// The stack space consumed by the function call; that is, - /// the sum of: - /// - /// 1. The amount of stack space created by saving any live - /// registers at the callsite. - /// 2. The amount of space used by any memory entries in the value - /// stack present at the callsite, that will be used as - /// arguments for the function call. Any memory values in the - /// value stack that are needed as part of the function - /// arguments, will be consumed by the function call (either by - /// assigning those values to a register or by storing those - /// values to a memory location if the callee argument is on - /// the stack), so we track that stack space to reclaim it once - /// the function call has ended. This could also be done in - /// `assign_args` everytime a memory entry needs to be assigned - /// to a particular location, but doing so, will incur in more - /// instructions (e.g. a pop per argument that needs to be - /// assigned); it's more efficient to track the space needed by - /// those memory values and reclaim it at once. - /// - /// The machine stack throghout the function call is as follows: - /// ┌──────────────────────────────────────────────────┐ - /// │ │ - /// │ 1 │ - /// │ Stack space created by any previous spills │ - /// │ from the value stack; and which memory values │ - /// │ are used as function arguments. │ - /// │ │ - /// ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like: - /// │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ] - /// │ 2 │ - /// │ Stack space created by saving │ - /// │ any live registers at the callsite. │ - /// │ │ - /// │ │ - /// ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like: - /// │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ] - /// │ │ Assuming that the callee takes 4 arguments, we calculate - /// │ │ 2 spilled registers + 2 memory values; all of which will be used - /// │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is - /// │ the callee function arguments in the stack; │ is considered to be consumed by the call. - /// │ represented by `arg_stack_space` │ - /// │ │ - /// │ │ - /// │ │ - /// └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call - /// - call_stack_space: Option, - /// The total stack space needed for the callee arguments on the - /// stack, including any adjustments to the function's frame and - /// aligned to to the required ABI alignment. - arg_stack_space: u32, - /// The ABI-specific signature of the callee. - pub abi_sig: &'a ABISig, - /// Whether this a built-in function call. - lib: bool, +pub(crate) struct FnCall {} + +/// Internal wrapping of a function signature. +enum Sig<'a> { + Owned(ABISig), + Borrowed(&'a ABISig), } -impl<'a> FnCall<'a> { - /// Creates a new [`FnCall`] from the callee's [`ABISig`]. - pub fn new(callee_sig: &'a ABISig) -> Self { - Self { - abi_sig: &callee_sig, - arg_stack_space: callee_sig.stack_bytes, - call_stack_space: None, - lib: false, +impl<'a> Sig<'a> { + /// Get a reference to the underling signature. + pub fn as_ref(&self) -> &ABISig { + match self { + Self::Owned(ref s) => s, + Self::Borrowed(b) => b, } } +} - /// Saves any live registers and records the stack space that will be - /// consumed by the function call. The stack space consumed by the call must - /// be known before emitting the call via any of the emission variants: - /// [`FnCall::direct`], [`FnCall::indirect`] or [`FnCall::addr`], which - /// means that the call stack space must be calculated either by invoking - /// [`FnCall::save_live_registers`] or - /// [`FnCall::calculate_call_stack_space`] before invoking any of - /// the emission variants. - pub fn save_live_registers( - &mut self, - context: &mut CodeGenContext, +impl FnCall { + /// Orchestrates the emission of a function call: + /// 1. Resolves the [`Callee`] through the given callback. + /// 2. Maps the resolved [`Callee`] to the [`CalleeKind`]. + /// 3. Saves any live registers and calculates the stack space consumed + /// by the function call. + /// 4. Emits the call. + pub fn emit( masm: &mut M, - ) -> &mut Self { - // Invariant: ensure that `call_stack_space` is only set once: either by - // [`FnCall::save_live_registers`] or - // [`FnCall::calculate_call_stack_space`] - debug_assert!(self.call_stack_space.is_none()); - let callee_params = &self.abi_sig.params; - let stack = &context.stack; - let call_stack_space = match callee_params.len() { - 0 => { - let _ = context.save_live_registers_and_calculate_sizeof(masm, ..); - 0u32 - } - _ => { - // Here we perform a "spill" of the register entries - // in the Wasm value stack, we also count any memory - // values that will be used used as part of the callee - // arguments. Saving the live registers is done by - // emitting push operations for every `Reg` entry in - // the Wasm value stack. We do this to be compliant - // with Winch's internal ABI, in which all registers - // are treated as caller-saved. For more details, see - // [ABI]. - // - // The next few lines, partition the value stack into - // two sections: - // +------------------+--+--- (Stack top) - // | | | - // | | | 1. The top `n` elements, which are used for - // | | | function arguments; for which we save any - // | | | live registers, keeping track of the amount of registers - // +------------------+ | saved plus the amount of memory values consumed by the function call; - // | | | with this information we can later reclaim the space used by the function call. - // | | | - // +------------------+--+--- - // | | | 2. The rest of the items in the stack, for which - // | | | we only save any live registers. - // | | | - // +------------------+ | - assert!(stack.len() >= callee_params.len()); - let partition = stack.len() - callee_params.len(); - let _ = context.save_live_registers_and_calculate_sizeof(masm, 0..partition); - context.save_live_registers_and_calculate_sizeof(masm, partition..) - } - }; + context: &mut CodeGenContext, + mut resolve: R, + ) where + R: FnMut(&mut CodeGenContext) -> Callee, + { + let callee = resolve(context); + let ptr_type = ptr_type_from_ptr_size(context.vmoffsets.ptr.size()); + let sig = Self::get_sig::(&callee, ptr_type); + let sig = sig.as_ref(); - self.call_stack_space = Some(call_stack_space); - self - } + let arg_stack_space = sig.stack_bytes; + let kind = Self::map(&context.vmoffsets, &callee, sig, context, masm); + let call_stack_space = Self::save(context, masm, &sig); - /// Records the stack space that will be needeed by the function call by - /// scanning the value stack and returning the size of the all the memory - /// entries present in callee's argument length range. The stack space - /// consumed by the call must be known before emitting the call via any of - /// the emission variants: [`FnCall::direct`], [`FnCall::indirect`] or - /// [`FnCall::addr`], which means that the call stack space must be - /// calculated either by invoking [`FnCall::save_live_registers`] or - /// [`FnCall::calculate_call_stack_space`] before invoking any of - /// the emission variants. - /// This function is particularly useful when there's no need to save any - /// live registers before emitting the function call. This could happen when - /// emitting calls to libcalls: [`FnCall::with_lib`] will eagerly save all - /// the live registers when invoked and will also ensure that any registers - /// allocated after are non argument registers, in which case if any of - /// those registers need to go on the value stack to be used as function - /// arguments, they don't need to be saved. - pub fn calculate_call_stack_space(&mut self, context: &mut CodeGenContext) -> &mut Self { - // Invariant: ensure that `call_stack_space` is only set once: either by - // [`FnCall::save_live_registers`] or - // [`FnCall::calculate_call_stack_space`] - debug_assert!(self.call_stack_space.is_none()); - let params_len = self.abi_sig.params.len(); - assert!(context.stack.len() >= params_len); + let reserved_stack = masm.call(arg_stack_space, |masm| { + let scratch = ::scratch_reg(); + Self::assign(sig, context, masm, scratch); + kind + }); - let stack_len = context.stack.len(); - let call_stack_space = if params_len == 0 { - 0 - } else { - context.stack.sizeof((stack_len - params_len)..) - }; - self.call_stack_space = Some(call_stack_space); - self + match kind { + CalleeKind::Indirect(r) => context.free_reg(r), + _ => {} + } + Self::cleanup( + sig, + call_stack_space.checked_add(reserved_stack).unwrap(), + masm, + context, + ); } - /// Emit a direct function call, to a locally defined function. - pub fn direct( - self, - masm: &mut M, - context: &mut CodeGenContext, - callee: FuncIndex, - ) { - // Invariant: `call_stack_space` must be known. - debug_assert!(self.call_stack_space.is_some()); - let reserved_stack = masm.call(self.arg_stack_space, |masm| { - self.assign_args(context, masm, ::scratch_reg()); - CalleeKind::direct(callee.as_u32()) - }); - self.post_call::(masm, context, reserved_stack); + /// Derive the [`ABISig`] for a particulare [`Callee]. + fn get_sig(callee: &Callee, ptr_type: WasmType) -> Sig { + match callee { + Callee::Builtin(info) => Sig::Borrowed(info.sig()), + Callee::Import(info) => { + let mut params: SmallVec<[WasmType; 6]> = + SmallVec::with_capacity(info.ty.params().len() + 2); + params.extend_from_slice(&[ptr_type, ptr_type]); + params.extend_from_slice(info.ty.params()); + Sig::Owned(::sig_from( + ¶ms, + info.ty.returns(), + &CallingConvention::Default, + )) + } + Callee::Local(info) => { + Sig::Owned(::sig(&info.ty, &CallingConvention::Default)) + } + Callee::FuncRef(ty) => { + Sig::Owned(::sig(&ty, &CallingConvention::Default)) + } + } } - /// Emit an indirect function call, using a register. - pub fn reg(self, masm: &mut M, context: &mut CodeGenContext, reg: Reg) { - // Invariant: `call_stack_space` must be known. - debug_assert!(self.call_stack_space.is_some()); - let reserved_stack = masm.call(self.arg_stack_space, |masm| { - let scratch = ::scratch_reg(); - self.assign_args(context, masm, scratch); - CalleeKind::indirect(reg) - }); - context.free_reg(reg); - self.post_call::(masm, context, reserved_stack); + /// Maps the given [`Callee`] to a [`CalleeKind`]. + fn map( + vmoffsets: &VMOffsets

, + callee: &Callee, + sig: &ABISig, + context: &mut CodeGenContext, + masm: &mut M, + ) -> CalleeKind { + match callee { + Callee::Builtin(b) => Self::load_builtin(b, context, masm), + Callee::FuncRef(_) => Self::load_funcref(sig, vmoffsets.ptr.size(), context, masm), + Callee::Local(i) => Self::map_local(i), + Callee::Import(i) => Self::load_import(i, sig, context, masm, vmoffsets), + } } - /// Emit an indirect function call, using a an address. - /// This function will load the provided address into a unallocatable - /// scratch register. - pub fn addr( - self, - masm: &mut M, + /// Load a built-in function to the next available register. + fn load_builtin( + builtin: &BuiltinFunction, context: &mut CodeGenContext, - callee: M::Address, - ) { - // Invariant: `call_stack_space` must be known. - debug_assert!(self.call_stack_space.is_some()); - let reserved_stack = masm.call(self.arg_stack_space, |masm| { - let scratch = ::scratch_reg(); - self.assign_args(context, masm, scratch); - masm.load(callee, scratch, OperandSize::S64); - CalleeKind::indirect(scratch) - }); + masm: &mut M, + ) -> CalleeKind { + match builtin.ty() { + BuiltinType::Dynamic { offset, base } => { + let sig = builtin.sig(); + let callee = context.without::(&sig.regs, masm, |cx, masm| { + let scratch = ::scratch_reg(); + let builtins_base = masm.address_at_vmctx(base); + masm.load_ptr(builtins_base, scratch); + let addr = masm.address_at_reg(scratch, offset); + let callee = cx.any_gpr(masm); + masm.load_ptr(addr, callee); + callee + }); + CalleeKind::indirect(callee) + } + BuiltinType::Known(c) => CalleeKind::known(c), + } + } - self.post_call::(masm, context, reserved_stack); + /// Map a local function to a [`CalleeKind`]. + fn map_local(info: &CalleeInfo) -> CalleeKind { + CalleeKind::direct(info.index.as_u32()) } - /// Prepares the compiler to call a built-in function (libcall). - /// This fuction, saves all the live registers and loads the callee - /// address into a non-argument register which is then passed to the - /// caller through the provided callback. - /// - /// It is the caller's responsibility to finalize the function call - /// by calling `FnCall::reg` once all the information is known. - pub fn with_lib( - &mut self, - masm: &mut M, + /// Loads a function import to the next available register. + fn load_import( + info: &CalleeInfo, + sig: &ABISig, context: &mut CodeGenContext, - func: &BuiltinFunction, - mut f: F, - ) where - F: FnMut(&mut CodeGenContext, &mut M, &mut Self, Reg), - { - self.lib = true; - // When dealing with libcalls, we don't have all the information - // upfront (all necessary arguments in the stack) in order to optimize - // saving the live registers, so we save all the values available in - // the value stack. - context.spill(masm); - let vmctx = ::vmctx_reg(); - let scratch = ::scratch_reg(); + masm: &mut M, + vmoffsets: &VMOffsets

, + ) -> CalleeKind { + let ptr_type = ptr_type_from_ptr_size(vmoffsets.ptr.size()); + let caller_vmctx = ::vmctx_reg(); + let (callee, callee_vmctx) = + context.without::<(Reg, Reg), M, _>(&sig.regs, masm, |context, masm| { + (context.any_gpr(masm), context.any_gpr(masm)) + }); + let callee_vmctx_offset = vmoffsets.vmctx_vmfunction_import_vmctx(info.index); + let callee_vmctx_addr = masm.address_at_vmctx(callee_vmctx_offset); + masm.load_ptr(callee_vmctx_addr, callee_vmctx); - let builtins_base = masm.address_at_reg(vmctx, func.base); - masm.load(builtins_base, scratch, OperandSize::S64); - let builtin_func_addr = masm.address_at_reg(scratch, func.offset); - context.without::<(), M, _>( - // Do not free the result registers if any as the function call will - // push them onto the stack as a result of the call. - self.abi_sig.regs(), - self.abi_sig.param_regs(), - masm, - |cx, masm| { - let callee = cx.any_gpr(masm); - masm.load_ptr(builtin_func_addr, callee); - f(cx, masm, self, callee); - cx.free_reg(callee); - }, - ); + let callee_body_offset = vmoffsets.vmctx_vmfunction_import_wasm_call(info.index); + let callee_addr = masm.address_at_vmctx(callee_body_offset); + masm.load_ptr(callee_addr, callee); + + // Put the callee / caller vmctx at the start of the + // range of the stack so that they are used as first + // and second arguments. + let stack = &mut context.stack; + let location = stack.len() - (sig.params.len() - 2); + let values = [ + TypedReg::new(ptr_type, callee_vmctx).into(), + TypedReg::new(ptr_type, caller_vmctx).into(), + ] + .into_iter(); + context.stack.insert_many(location, values); + + CalleeKind::indirect(callee) } - fn post_call(&self, masm: &mut M, context: &mut CodeGenContext, size: u32) { - masm.free_stack(self.call_stack_space.unwrap() + size); - // Only account for registers given that any memory entries - // consumed by the call (assigned to a register or to a stack - // slot) were freed by the previous call to - // `masm.free_stack`, so we only care about dropping them - // here. - // - // NOTE / TODO there's probably a path to getting rid of - // `save_live_registers_and_calculate_sizeof` and - // `call_stack_space`, making it a bit more obvious what's - // happening here. We could: - // - // * Modify the `spill` implementation so that it takes a - // filtering callback, to control which values the caller is - // interested in saving (e.g. save all if no function is provided) - // * Rely on the new implementation of `drop_last` to calcuate - // the stack memory entries consumed by the call and then free - // the calculated stack space. - context.drop_last(self.abi_sig.params.len(), |regalloc, v| { - if v.is_reg() { - regalloc.free(v.get_reg().into()); - } + /// Loads a function reference to the next available register. + fn load_funcref( + sig: &ABISig, + ptr: impl PtrSize, + context: &mut CodeGenContext, + masm: &mut M, + ) -> CalleeKind { + // Pop the funcref pointer to a register and allocate a register to hold the + // address of the funcref. Since the callee is not addressed from a global non + // allocatable register (like the vmctx in the case of an import), we load the + // funcref to a register ensuring that it doesn't get assigned to a non-arg + // register. + let (funcref_ptr, funcref) = context.without::<_, M, _>(&sig.regs, masm, |cx, masm| { + (cx.pop_to_reg(masm, None).into(), cx.any_gpr(masm)) }); - // When emitting built-calls we ensure that none of the registers - // (params and results) used as part of the ABI signature are - // allocatable throughout the lifetime of the `with_lib` callback, since - // such registers will be used to assign arguments and hold results. - // After executing the callback, it's only safe to free the param - // registers, since depending on the signature, the caller - // will push any result registers to the stack, keeping those registers allocated. - // Here we ensure that any allocated result registers are correctly - // freed before finalizing the function call and pushing any results to - // the value stack. - if self.lib { - match self.abi_sig.result { - ABIResult::Reg { reg, .. } => { - assert!(!context.regalloc.reg_available(reg)); - context.free_reg(reg); - } - _ => {} - } - } - context.push_abi_results(&self.abi_sig.result, masm); + masm.load_ptr( + masm.address_at_reg(funcref_ptr, ptr.vm_func_ref_wasm_call().into()), + funcref, + ); + context.free_reg(funcref_ptr); + CalleeKind::indirect(funcref) } - fn assign_args( - &self, + /// Assign arguments for the function call. + fn assign( + sig: &ABISig, context: &mut CodeGenContext, masm: &mut M, scratch: Reg, ) { - let arg_count = self.abi_sig.params.len(); + let arg_count = sig.params.len(); let stack = &context.stack; let mut stack_values = stack.peekn(arg_count); - for arg in &self.abi_sig.params { + for arg in &sig.params { let val = stack_values .next() .unwrap_or_else(|| panic!("expected stack value for function argument")); @@ -341,4 +288,81 @@ impl<'a> FnCall<'a> { } } } + + /// Save any live registers prior to emitting the call. + // + // Here we perform a "spill" of the register entries + // in the Wasm value stack, we also count any memory + // values that will be used used as part of the callee + // arguments. Saving the live registers is done by + // emitting push operations for every `Reg` entry in + // the Wasm value stack. We do this to be compliant + // with Winch's internal ABI, in which all registers + // are treated as caller-saved. For more details, see + // [ABI]. + // + // The next few lines, partition the value stack into + // two sections: + // +------------------+--+--- (Stack top) + // | | | + // | | | 1. The top `n` elements, which are used for + // | | | function arguments; for which we save any + // | | | live registers, keeping track of the amount of registers + // +------------------+ | saved plus the amount of memory values consumed by the function call; + // | | | with this information we can later reclaim the space used by the function call. + // | | | + // +------------------+--+--- + // | | | 2. The rest of the items in the stack, for which + // | | | we only save any live registers. + // | | | + // +------------------+ | + fn save(context: &mut CodeGenContext, masm: &mut M, sig: &ABISig) -> u32 { + let callee_params = &sig.params; + let stack = &context.stack; + match callee_params.len() { + 0 => { + let _ = context.save_live_registers_and_calculate_sizeof(masm, ..); + 0u32 + } + _ => { + assert!(stack.len() >= callee_params.len()); + let partition = stack.len() - callee_params.len(); + let _ = context.save_live_registers_and_calculate_sizeof(masm, 0..partition); + context.save_live_registers_and_calculate_sizeof(masm, partition..) + } + } + } + + /// Cleanup stack space and free registers after emitting the call. + fn cleanup( + sig: &ABISig, + total_space: u32, + masm: &mut M, + context: &mut CodeGenContext, + ) { + masm.free_stack(total_space); + // Only account for registers given that any memory entries + // consumed by the call (assigned to a register or to a stack + // slot) were freed by the previous call to + // `masm.free_stack`, so we only care about dropping them + // here. + // + // NOTE / TODO there's probably a path to getting rid of + // `save_live_registers_and_calculate_sizeof` and + // `call_stack_space`, making it a bit more obvious what's + // happening here. We could: + // + // * Modify the `spill` implementation so that it takes a + // filtering callback, to control which values the caller is + // interested in saving (e.g. save all if no function is provided) + // * Rely on the new implementation of `drop_last` to calcuate + // the stack memory entries consumed by the call and then free + // the calculated stack space. + context.drop_last(sig.params.len(), |regalloc, v| { + if v.is_reg() { + regalloc.free(v.get_reg().into()); + } + }); + context.push_abi_results(&sig.result, masm); + } } diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index d2821aa3ec54..aeeb8cb42ff6 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -1,8 +1,9 @@ -use wasmtime_environ::{WasmHeapType, WasmType}; +use wasmtime_environ::{VMOffsets, WasmHeapType, WasmType}; use super::ControlStackFrame; use crate::{ abi::{ABIResult, ABI}, + codegen::BuiltinFunctions, frame::Frame, isa::reg::RegClass, masm::{MacroAssembler, OperandSize, RegImm}, @@ -27,25 +28,37 @@ use std::ops::RangeBounds; /// generation process. The code generation context should /// be generally used as the single entry point to access /// the compound functionality provided by its elements. -pub(crate) struct CodeGenContext<'a> { +pub(crate) struct CodeGenContext<'a, 'b: 'a> { /// The register allocator. pub regalloc: RegAlloc, /// The value stack. pub stack: Stack, /// The current function's frame. - pub frame: &'a Frame, + pub frame: Frame, /// Reachability state. pub reachable: bool, + /// The built-in functions available to the JIT code. + pub builtins: &'b mut BuiltinFunctions, + /// A reference to the VMOffsets. + pub vmoffsets: &'a VMOffsets, } -impl<'a> CodeGenContext<'a> { +impl<'a, 'b> CodeGenContext<'a, 'b> { /// Create a new code generation context. - pub fn new(regalloc: RegAlloc, stack: Stack, frame: &'a Frame) -> Self { + pub fn new( + regalloc: RegAlloc, + stack: Stack, + frame: Frame, + builtins: &'b mut BuiltinFunctions, + vmoffsets: &'a VMOffsets, + ) -> Self { Self { regalloc, stack, frame, reachable: true, + builtins, + vmoffsets, } } @@ -90,10 +103,9 @@ impl<'a> CodeGenContext<'a> { /// execution. Only the registers in the `free` iterator will be freed. The /// caller must guarantee that in case the iterators are different, the free /// iterator must be a subset of the alloc iterator. - pub fn without( + pub fn without<'r, T, M, F>( &mut self, - alloc: impl Iterator, - free: impl Iterator, + regs: impl IntoIterator + Copy, masm: &mut M, mut f: F, ) -> T @@ -101,21 +113,32 @@ impl<'a> CodeGenContext<'a> { M: MacroAssembler, F: FnMut(&mut Self, &mut M) -> T, { - debug_assert!(free.size_hint().0 <= alloc.size_hint().0); - - for r in alloc { - self.reg(r, masm); + for r in regs { + self.reg(*r, masm); } let result = f(self, masm); - for r in free { - self.free_reg(r); + for r in regs { + self.free_reg(*r); } result } + /// Similar to [`Self::without`] but takes an optional, single register + /// as a paramter. + pub fn maybe_without1(&mut self, reg: Option, masm: &mut M, mut f: F) -> T + where + M: MacroAssembler, + F: FnMut(&mut Self, &mut M) -> T, + { + match reg { + Some(r) => self.without(&[r], masm, f), + None => f(self, masm), + } + } + /// Free the given register. pub fn free_reg(&mut self, reg: impl Into) { let reg: Reg = reg.into(); @@ -378,22 +401,6 @@ impl<'a> CodeGenContext<'a> { } } - /// Pops the value at the stack top and assigns it to the local at - /// the given index, returning the typed register holding the - /// source value. - pub fn set_local(&mut self, masm: &mut M, index: u32) -> TypedReg { - let slot = self - .frame - .get_local(index) - .unwrap_or_else(|| panic!("invalid local slot = {}", index)); - let size: OperandSize = slot.ty.into(); - let src = self.pop_to_reg(masm, None); - let addr = masm.local_address(&slot); - masm.store(RegImm::reg(src.reg), addr, size); - - src - } - /// Spill locals and registers to memory. // TODO optimize the spill range; // diff --git a/winch/codegen/src/codegen/env.rs b/winch/codegen/src/codegen/env.rs index 32f40cc791bb..5b3c9575f854 100644 --- a/winch/codegen/src/codegen/env.rs +++ b/winch/codegen/src/codegen/env.rs @@ -1,7 +1,4 @@ -use crate::{ - codegen::{BuiltinFunctions, OperandSize}, - CallingConvention, -}; +use crate::codegen::{BuiltinFunction, OperandSize}; use smallvec::{smallvec, SmallVec}; use std::collections::{ hash_map::Entry::{Occupied, Vacant}, @@ -32,6 +29,7 @@ pub struct TableData { /// A function callee. /// It categorizes how the callee should be treated /// when performing the call. +#[derive(Clone)] pub enum Callee { /// Locally defined function. Local(CalleeInfo), @@ -39,10 +37,38 @@ pub enum Callee { Import(CalleeInfo), /// Function reference. FuncRef(WasmFuncType), + /// A built-in function. + Builtin(BuiltinFunction), +} + +impl Callee { + /// Get the built-in function metadata. + /// + /// # Panics + /// This function panics if the [`Callee`] is not a built-in function. + pub fn get_builtin(&self) -> &BuiltinFunction { + match self { + Self::Builtin(f) => f, + _ => panic!(), + } + } + + /// Get the associated [`CalleeInfo`], if any. + /// + /// # Panics + /// This function panics if the [`Callee`] is not a local or imported + /// callee. + pub fn get_info(&self) -> &CalleeInfo { + match self { + Self::Local(i) | Self::Import(i) => i, + _ => panic!(), + } + } } /// Metadata about a function callee. Used by the code generation to /// emit function calls to local or imported functions. +#[derive(Clone)] pub struct CalleeInfo { /// The function type. pub ty: WasmFuncType, @@ -54,15 +80,13 @@ pub struct CalleeInfo { /// /// Contains all information about the module and runtime that is accessible to /// to a particular function during code generation. -pub struct FuncEnv<'a, P: PtrSize> { +pub struct FuncEnv<'a, 'b: 'a, 'c: 'b, P: PtrSize> { /// Offsets to the fields within the `VMContext` ptr. - pub vmoffsets: VMOffsets

, + pub vmoffsets: &'a VMOffsets

, /// Metadata about the translation process of a WebAssembly module. - pub translation: &'a ModuleTranslation<'a>, - /// Metadata about the builtin functions. - pub builtins: BuiltinFunctions, + pub translation: &'b ModuleTranslation<'c>, /// The module's function types. - pub types: &'a ModuleTypes, + pub types: &'b ModuleTypes, /// Track resolved table information. resolved_tables: HashMap, } @@ -73,32 +97,21 @@ pub fn ptr_type_from_ptr_size(size: u8) -> WasmType { .unwrap_or_else(|| unimplemented!("Support for non-64-bit architectures")) } -impl<'a, P: PtrSize> FuncEnv<'a, P> { +impl<'a, 'b, 'c, P: PtrSize> FuncEnv<'a, 'b, 'c, P> { /// Create a new function environment. pub fn new( - ptr: P, - translation: &'a ModuleTranslation, - types: &'a ModuleTypes, - call_conv: CallingConvention, + vmoffsets: &'a VMOffsets

, + translation: &'b ModuleTranslation<'c>, + types: &'b ModuleTypes, ) -> Self { - let vmoffsets = VMOffsets::new(ptr, &translation.module); - let size = vmoffsets.ptr.size(); - let builtins_base = vmoffsets.vmctx_builtin_functions(); Self { vmoffsets, translation, - builtins: BuiltinFunctions::new(size, call_conv, builtins_base), types, resolved_tables: HashMap::new(), } } - /// Returns a slice of types representing the caller and callee VMContext types. - pub(crate) fn vmctx_args_type(&self) -> [WasmType; 2] { - let ty = self.ptr_type(); - [ty, ty] - } - /// Derive the [`WasmType`] from the pointer size. pub(crate) fn ptr_type(&self) -> WasmType { ptr_type_from_ptr_size(self.ptr_size()) diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index f591605d3fb2..a96daccf46f7 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -1,17 +1,13 @@ use crate::{ abi::{ABISig, ABI}, isa::reg::Reg, - masm::RegImm, - masm::{CmpKind, MacroAssembler, OperandSize, TrapCode}, + masm::{CmpKind, MacroAssembler, OperandSize, RegImm, TrapCode}, stack::{TypedReg, Val}, - CallingConvention, }; use anyhow::Result; use smallvec::SmallVec; use wasmparser::{BinaryReader, FuncValidator, Operator, ValidatorResources, VisitOperator}; -use wasmtime_environ::{ - PtrSize, TableIndex, TypeIndex, WasmFuncType, WasmHeapType, WasmType, FUNCREF_MASK, -}; +use wasmtime_environ::{PtrSize, TableIndex, TypeIndex, WasmHeapType, WasmType, FUNCREF_MASK}; mod context; pub(crate) use context::*; @@ -22,10 +18,10 @@ pub(crate) use call::*; mod control; pub(crate) use control::*; mod builtin; -pub(crate) use builtin::*; +pub use builtin::*; /// The code generation abstraction. -pub(crate) struct CodeGen<'a, M> +pub(crate) struct CodeGen<'a, 'b: 'a, 'c: 'b, M> where M: MacroAssembler, { @@ -33,10 +29,10 @@ where sig: ABISig, /// The code generation context. - pub context: CodeGenContext<'a>, + pub context: CodeGenContext<'a, 'b>, /// A reference to the function compilation environment. - pub env: FuncEnv<'a, M::Ptr>, + pub env: FuncEnv<'a, 'b, 'c, M::Ptr>, /// The MacroAssembler. pub masm: &'a mut M, @@ -47,14 +43,14 @@ where pub control_frames: SmallVec<[ControlStackFrame; 64]>, } -impl<'a, M> CodeGen<'a, M> +impl<'a, 'b, 'c, M> CodeGen<'a, 'b, 'c, M> where M: MacroAssembler, { pub fn new( masm: &'a mut M, - context: CodeGenContext<'a>, - env: FuncEnv<'a, M::Ptr>, + context: CodeGenContext<'a, 'b>, + env: FuncEnv<'a, 'b, 'c, M::Ptr>, sig: ABISig, ) -> Self { Self { @@ -234,7 +230,7 @@ where fn is_reachable(&self) -> bool; } - impl<'a, M: MacroAssembler> ReachableState for CodeGen<'a, M> { + impl<'a, 'b, 'c, M: MacroAssembler> ReachableState for CodeGen<'a, 'b, 'c, M> { fn is_reachable(&self) -> bool { self.context.reachable } @@ -252,90 +248,6 @@ where } } - /// Emit a function call to: - /// * A locally defined function. - /// * A function import. - /// * A funcref. - pub fn emit_call(&mut self, callee: Callee) { - let ptr_type = self.env.ptr_type(); - match callee { - Callee::Import(callee) => { - let mut params = Vec::with_capacity(callee.ty.params().len() + 2); - params.extend_from_slice(&self.env.vmctx_args_type()); - params.extend_from_slice(callee.ty.params()); - let sig = WasmFuncType::new(params.into(), callee.ty.returns().into()); - - let caller_vmctx = ::vmctx_reg(); - let callee_vmctx = self.context.any_gpr(self.masm); - let callee_vmctx_offset = self - .env - .vmoffsets - .vmctx_vmfunction_import_vmctx(callee.index); - let callee_vmctx_addr = self.masm.address_at_vmctx(callee_vmctx_offset); - self.masm.load_ptr(callee_vmctx_addr, callee_vmctx); - - let callee_body_offset = self - .env - .vmoffsets - .vmctx_vmfunction_import_wasm_call(callee.index); - let callee_addr = self.masm.address_at_vmctx(callee_body_offset); - - // Put the callee / caller vmctx at the start of the - // range of the stack so that they are used as first - // and second arguments. - let stack = &mut self.context.stack; - let location = stack.len() - (sig.params().len() - 2); - let values = [ - TypedReg::new(ptr_type, callee_vmctx).into(), - TypedReg::new(ptr_type, caller_vmctx).into(), - ] - .into_iter(); - self.context.stack.insert_many(location, values); - - let abi_sig = ::sig(&sig, &CallingConvention::Default); - FnCall::new(&abi_sig) - .save_live_registers(&mut self.context, self.masm) - .addr(self.masm, &mut self.context, callee_addr); - } - - Callee::Local(callee) => { - let abi_sig = ::sig(&callee.ty, &CallingConvention::Default); - FnCall::new(&abi_sig) - .save_live_registers(&mut self.context, self.masm) - .direct(self.masm, &mut self.context, callee.index); - } - - Callee::FuncRef(ty) => { - // Get type for the caller and callee VMContext. - let abi_sig = ::sig(&ty, &CallingConvention::Default); - // Pop the funcref pointer to a register and allocate a register to hold the - // address of the funcref. Since the callee is not addressed from a global non - // allocatable register (like the vmctx in the case of an import), we load the - // funcref to a register ensuring that it doesn't get assigned to a non-arg - // register. - let (funcref_ptr, funcref) = self.context.without::<_, M, _>( - abi_sig.param_regs(), - abi_sig.param_regs(), - self.masm, - |cx, masm| (cx.pop_to_reg(masm, None).into(), cx.any_gpr(masm)), - ); - self.masm.load( - self.masm.address_at_reg( - funcref_ptr, - self.env.vmoffsets.ptr.vm_func_ref_wasm_call().into(), - ), - funcref, - ptr_type.into(), - ); - self.context.free_reg(funcref_ptr); - - FnCall::new(&abi_sig) - .save_live_registers(&mut self.context, self.masm) - .reg(self.masm, &mut self.context, funcref); - } - }; - } - /// Emits a a series of instructions that will type check a function reference call. pub fn emit_typecheck_funcref(&mut self, funcref_ptr: Reg, type_index: TypeIndex) { let ptr_size: OperandSize = self.env.ptr_type().into(); @@ -417,70 +329,89 @@ where }); } - /// Emits a series of instructions to lazily initialize a function reference. - pub fn emit_lazy_init_funcref( - table_data: &TableData, - table_index: TableIndex, - ptr_type: WasmType, - context: &mut CodeGenContext, - masm: &mut M, - call: &mut FnCall, - callee: Reg, - ) { - let index = context.pop_to_reg(masm, None); - let elem_value: Reg = context.any_gpr(masm).into(); - let base = context.any_gpr(masm); - let elem_addr = masm.table_elem_address(index.into(), base, &table_data, context); - masm.load_ptr(elem_addr, elem_value); - - let defined = masm.get_label(); - let cont = masm.get_label(); - - // Preemptively move the table element address to the - // result register, to avoid conflicts at the control flow merge. - let result = call.abi_sig.result.result_reg().unwrap(); - masm.mov(elem_value.into(), result, ptr_type.into()); - - // Push the builtin function arguments to the stack. - context - .stack - .push(TypedReg::new(ptr_type, ::vmctx_reg()).into()); - context.stack.push(table_index.as_u32().try_into().unwrap()); - context.stack.push(index.into()); - - // `branch` in this case will perform a test of the given register, - // and jump to the defined branch if it's not zero. - masm.branch( + /// Pops the value at the stack top and assigns it to the local at + /// + /// the given index, returning the typed register holding the + /// source value. + pub fn emit_set_local(&mut self, addr: M::Address, size: OperandSize) -> TypedReg { + let src = self.context.pop_to_reg(self.masm, None); + self.masm.store(RegImm::reg(src.reg), addr, size); + + src + } + + pub fn emit_lazy_init_funcref(&mut self, table_index: TableIndex) { + let table_data = self.env.resolve_table_data(table_index); + let ptr_type = self.env.ptr_type(); + let builtin = self + .context + .builtins + .table_get_lazy_init_func_ref::(); + + // Request the builtin's result register and use it to hold the + // table element value. We preemptively request this register to + // avoid conflict at the control flow merge below. + // Requesting the result register is safe since we know ahead-of-time + // the builtin's signature. + let elem_value: Reg = self + .context + .reg(builtin.sig().result.result_reg().unwrap(), self.masm) + .into(); + + let index = self.context.pop_to_reg(self.masm, None); + let base = self.context.any_gpr(self.masm); + + let elem_addr = + self.masm + .table_elem_address(index.into(), base, &table_data, &mut self.context); + self.masm.load_ptr(elem_addr, elem_value); + // Free the register used as base, once we have loaded the element + // address into the element value register. + self.context.free_reg(base); + + let (defined, cont) = (self.masm.get_label(), self.masm.get_label()); + + // Push the built-int arguments to the stack. + self.context.stack.extend( + [ + TypedReg::new(ptr_type, ::vmctx_reg()).into(), + table_index.as_u32().try_into().unwrap(), + index.into(), + ] + .into_iter(), + ); + + self.masm.branch( CmpKind::Ne, elem_value.into(), elem_value, defined, ptr_type.into(), ); + // Free the element value register. + // This is safe since the FnCall::emit call below, will ensure + // that the result register is placed on the value stack. + self.context.free_reg(elem_value); + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(builtin.clone()) + }); - call.calculate_call_stack_space(context) - .reg(masm, context, callee); // We know the signature of the libcall in this case, so we assert that there's // one element in the stack and that it's the ABI signature's result register. - let top = context.stack.peek().unwrap(); + let top = self.context.stack.peek().unwrap(); let top = top.get_reg(); - debug_assert!(top.reg == result); - masm.jmp(cont); + debug_assert!(top.reg == elem_value); + self.masm.jmp(cont); // In the defined case, mask the funcref address in place, by peeking into the // last element of the value stack, which was pushed by the `indirect` function // call above. - masm.bind(defined); + self.masm.bind(defined); let imm = RegImm::i64(FUNCREF_MASK as i64); let dst = top.into(); - masm.and(dst, dst, imm, top.ty.into()); - - masm.bind(cont); - // The indirect call above, will take care of freeing the registers used as - // params. - // So we only free the params used to lazily initialize the func ref. - context.free_reg(base); - context.free_reg(elem_value); + self.masm.and(dst, dst, imm, top.ty.into()); + + self.masm.bind(cont); } } diff --git a/winch/codegen/src/frame/mod.rs b/winch/codegen/src/frame/mod.rs index 2979162648e9..9a0329856c8c 100644 --- a/winch/codegen/src/frame/mod.rs +++ b/winch/codegen/src/frame/mod.rs @@ -1,9 +1,12 @@ -use crate::abi::{align_to, ty_size, ABIArg, ABISig, LocalSlot, ABI}; +use crate::{ + abi::{align_to, ty_size, ABIArg, ABISig, LocalSlot, ABI}, + masm::MacroAssembler, +}; use anyhow::Result; use smallvec::SmallVec; use std::ops::Range; use wasmparser::{BinaryReader, FuncValidator, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, TypeConvert}; +use wasmtime_environ::{ModuleTranslation, TypeConvert, WasmType}; // TODO: // SpiderMonkey's implementation uses 16; @@ -115,6 +118,20 @@ impl Frame { self.locals.get(index as usize) } + /// Returns the address of the local at the given index. + /// + /// # Panics + /// This function panics if the the index is not associated to a local. + pub fn get_local_address( + &self, + index: u32, + masm: &mut M, + ) -> (WasmType, M::Address) { + self.get_local(index) + .map(|slot| (slot.ty, masm.local_address(slot))) + .unwrap_or_else(|| panic!("Invalid local slot: {}", index)) + } + fn compute_arg_slots(sig: &ABISig) -> Result<(Locals, u32)> { // Go over the function ABI-signature and // calculate the stack slots. diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index f6032a9461ac..9f7b2533c983 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -258,7 +258,12 @@ impl Masm for MacroAssembler { todo!() } - fn float_round(&mut self, _mode: RoundingMode, _dst: Reg, _src: RegImm, _size: OperandSize) { + fn float_round( + &mut self, + _mode: RoundingMode, + _context: &mut CodeGenContext, + _size: OperandSize, + ) { todo!(); } diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index 70c61ab9ca31..ba9fb049e9bd 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -8,7 +8,7 @@ use crate::{ regalloc::RegAlloc, regset::RegBitSet, stack::Stack, - TrampolineKind, + BuiltinFunctions, TrampolineKind, }; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; @@ -17,7 +17,7 @@ use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use masm::MacroAssembler as Aarch64Masm; use target_lexicon::Triple; use wasmparser::{FuncValidator, FunctionBody, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, ModuleTypes, WasmFuncType}; +use wasmtime_environ::{ModuleTranslation, ModuleTypes, VMOffsets, WasmFuncType}; mod abi; mod address; @@ -85,11 +85,13 @@ impl TargetIsa for Aarch64 { fn compile_function( &self, sig: &WasmFuncType, - types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, + types: &ModuleTypes, + builtins: &mut BuiltinFunctions, validator: &mut FuncValidator, ) -> Result> { + let vmoffsets = VMOffsets::new(self.pointer_bytes(), &translation.module); let mut body = body.get_binary_reader(); let mut masm = Aarch64Masm::new(self.shared_flags.clone()); let stack = Stack::new(); @@ -105,13 +107,8 @@ impl TargetIsa for Aarch64 { // TODO: Add floating point bitmask let fpr = RegBitSet::float(0, 0, usize::try_from(MAX_FPR).unwrap()); let regalloc = RegAlloc::from(gpr, fpr); - let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let env = FuncEnv::new( - self.pointer_bytes(), - translation, - types, - self.wasmtime_call_conv(), - ); + let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets); + let env = FuncEnv::new(&vmoffsets, translation, types); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/isa/mod.rs b/winch/codegen/src/isa/mod.rs index 771fc556f9ac..53e821dde5f5 100644 --- a/winch/codegen/src/isa/mod.rs +++ b/winch/codegen/src/isa/mod.rs @@ -1,4 +1,4 @@ -use crate::TrampolineKind; +use crate::{BuiltinFunctions, TrampolineKind}; use anyhow::{anyhow, Result}; use core::fmt::Formatter; use cranelift_codegen::isa::{CallConv, IsaBuilder}; @@ -149,9 +149,10 @@ pub trait TargetIsa: Send + Sync { fn compile_function( &self, sig: &WasmFuncType, - types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, + types: &ModuleTypes, + builtins: &mut BuiltinFunctions, validator: &mut FuncValidator, ) -> Result>; diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index 6b94c5ad73c5..4f1fc068dd9a 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -6,7 +6,7 @@ use crate::{ }; use cranelift_codegen::{ entity::EntityRef, - ir::{types, ConstantPool, ExternalName, Opcode, TrapCode, UserExternalNameRef}, + ir::{types, ConstantPool, ExternalName, LibCall, Opcode, TrapCode, UserExternalNameRef}, isa::{ x64::{ args::{ @@ -876,6 +876,22 @@ impl Assembler { }); } + /// Emit a call to a well-known libcall. + pub fn call_with_lib(&mut self, lib: LibCall) { + let dest = ExternalName::LibCall(lib); + self.emit(Inst::CallKnown { + dest, + info: Box::new(CallInfo { + uses: smallvec![], + defs: smallvec![], + clobbers: Default::default(), + opcode: Opcode::Call, + callee_pop_size: 0, + callee_conv: CallConv::SystemV, + }), + }); + } + /// Emits a conditional jump to the given label. pub fn jmp_if(&mut self, cc: impl Into, taken: MachLabel) { self.emit(Inst::JmpIf { diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 25264de83e5a..336d015c29f8 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -12,7 +12,7 @@ use crate::masm::{ use crate::{abi::ABI, masm::StackSlot, stack::TypedReg}; use crate::{ abi::{self, align_to, calculate_frame_adjustment, LocalSlot}, - codegen::{ptr_type_from_ptr_size, CodeGenContext, TableData}, + codegen::{ptr_type_from_ptr_size, Callee, CodeGenContext, FnCall, TableData}, stack::Val, }; use crate::{ @@ -257,6 +257,7 @@ impl Masm for MacroAssembler { match callee { CalleeKind::Indirect(reg) => self.asm.call_with_reg(reg), CalleeKind::Direct(idx) => self.asm.call_with_index(idx), + CalleeKind::Known(lib) => self.asm.call_with_lib(lib), }; total_stack } @@ -396,11 +397,42 @@ impl Masm for MacroAssembler { self.asm.and_rr(scratch_xmm, dst, size); } - fn float_round(&mut self, mode: RoundingMode, dst: Reg, src: RegImm, size: OperandSize) { + fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize) { if self.flags.has_sse41() { - self.asm.rounds(src.get_reg().unwrap(), dst, mode, size); + let src = context.pop_to_reg(self, None); + self.asm.rounds(src.into(), src.into(), mode, size); + context.stack.push(src.into()); } else { - todo!("libcall fallback for rounding is not implemented") + FnCall::emit::(self, context, |context| { + let b = match (&mode, size) { + (RoundingMode::Up, OperandSize::S32) => { + context.builtins.ceil_f32::<::ABI>() + } + (RoundingMode::Up, OperandSize::S64) => { + context.builtins.ceil_f64::<::ABI>() + } + (RoundingMode::Down, OperandSize::S32) => { + context.builtins.floor_f32::<::ABI>() + } + (RoundingMode::Down, OperandSize::S64) => { + context.builtins.floor_f64::<::ABI>() + } + (RoundingMode::Nearest, OperandSize::S32) => { + context.builtins.nearest_f32::<::ABI>() + } + (RoundingMode::Nearest, OperandSize::S64) => { + context.builtins.nearest_f64::<::ABI>() + } + (RoundingMode::Zero, OperandSize::S32) => { + context.builtins.trunc_f32::<::ABI>() + } + (RoundingMode::Zero, OperandSize::S64) => { + context.builtins.trunc_f64::<::ABI>() + } + (_, _) => unreachable!(), + }; + Callee::Builtin(b) + }) } } diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index 4703f7f20edb..1d38ed4a1703 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -1,6 +1,6 @@ use crate::{ abi::ABI, - codegen::{CodeGen, CodeGenContext, FuncEnv}, + codegen::{BuiltinFunctions, CodeGen, CodeGenContext, FuncEnv}, }; use crate::frame::{DefinedLocals, Frame}; @@ -19,7 +19,7 @@ use cranelift_codegen::{isa::x64::settings as x64_settings, Final, MachBufferFin use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use target_lexicon::Triple; use wasmparser::{FuncValidator, FunctionBody, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, ModuleTypes, WasmFuncType}; +use wasmtime_environ::{ModuleTranslation, ModuleTypes, VMOffsets, WasmFuncType}; use self::regs::{ALL_FPR, ALL_GPR, MAX_FPR, MAX_GPR, NON_ALLOCATABLE_FPR, NON_ALLOCATABLE_GPR}; @@ -89,12 +89,15 @@ impl TargetIsa for X64 { fn compile_function( &self, sig: &WasmFuncType, - types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, + types: &ModuleTypes, + builtins: &mut BuiltinFunctions, validator: &mut FuncValidator, ) -> Result> { let pointer_bytes = self.pointer_bytes(); + let vmoffsets = VMOffsets::new(pointer_bytes, &translation.module); + let mut body = body.get_binary_reader(); let mut masm = X64Masm::new( pointer_bytes, @@ -118,8 +121,8 @@ impl TargetIsa for X64 { ); let regalloc = RegAlloc::from(gpr, fpr); - let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let env = FuncEnv::new(pointer_bytes, translation, types, self.wasmtime_call_conv()); + let env = FuncEnv::new(&vmoffsets, translation, types); + let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/lib.rs b/winch/codegen/src/lib.rs index f68aa5ab3a2a..12656ff4a265 100644 --- a/winch/codegen/src/lib.rs +++ b/winch/codegen/src/lib.rs @@ -7,7 +7,7 @@ #![cfg_attr(not(feature = "all-arch"), allow(dead_code))] mod abi; -pub use codegen::FuncEnv; +pub use codegen::{BuiltinFunctions, FuncEnv}; mod codegen; mod frame; pub mod isa; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index ab032cfb7253..d7508b5bbed3 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1,7 +1,7 @@ use crate::abi::{self, align_to, LocalSlot}; use crate::codegen::{CodeGenContext, TableData}; use crate::isa::reg::Reg; -use cranelift_codegen::{Final, MachBufferFinalized, MachLabel}; +use cranelift_codegen::{ir::LibCall, Final, MachBufferFinalized, MachLabel}; use std::{fmt::Debug, ops::Range}; use wasmtime_environ::PtrSize; @@ -183,12 +183,14 @@ impl Imm { } } -#[derive(Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) enum CalleeKind { /// A function call to a raw address. Indirect(Reg), /// A function call to a local function. Direct(u32), + /// Call to a well known LibCall. + Known(LibCall), } impl CalleeKind { @@ -201,6 +203,11 @@ impl CalleeKind { pub fn direct(index: u32) -> Self { Self::Direct(index) } + + /// Creates a known callee kind from a libcall. + pub fn known(call: LibCall) -> Self { + Self::Known(call) + } } impl RegImm { @@ -232,15 +239,6 @@ impl RegImm { pub fn f64(bits: u64) -> Self { RegImm::Imm(Imm::f64(bits)) } - - /// Get the underlying register of the operand, - /// if it is one. - pub fn get_reg(&self) -> Option { - match self { - Self::Reg(r) => Some(*r), - _ => None, - } - } } impl From for RegImm { @@ -381,7 +379,7 @@ pub(crate) trait MacroAssembler { fn float_neg(&mut self, dst: Reg, size: OperandSize); /// Perform a floating point floor operation. - fn float_round(&mut self, mode: RoundingMode, dst: Reg, src: RegImm, size: OperandSize); + fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize); /// Perform logical and operation. fn and(&mut self, dst: Reg, lhs: Reg, rhs: RegImm, size: OperandSize); diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index 2a993ae95e78..1fc61bdee6dd 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -1,6 +1,5 @@ use crate::{isa::reg::Reg, masm::StackSlot}; use std::collections::VecDeque; -use std::ops::RangeBounds; use wasmparser::{Ieee32, Ieee64}; use wasmtime_environ::WasmType; @@ -323,18 +322,6 @@ impl Stack { pub fn inner_mut(&mut self) -> &mut VecDeque { &mut self.inner } - - /// Calculates size in bytes of memory entries within the specified range of - /// the stack. - pub fn sizeof(&self, range: R) -> u32 - where - R: RangeBounds, - { - self.inner.range(range).fold(0, |acc, v| match v { - Val::Memory(m) => acc + m.slot.size, - _ => acc, - }) - } } #[cfg(test)] diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 40a290666920..287059cc8d5e 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -5,7 +5,7 @@ //! machine code emitter. use crate::abi::ABI; -use crate::codegen::{control_index, CodeGen, ControlStackFrame, FnCall}; +use crate::codegen::{control_index, Callee, CodeGen, ControlStackFrame, FnCall}; use crate::masm::{ CmpKind, DivKind, MacroAssembler, OperandSize, RegImm, RemKind, RoundingMode, ShiftKind, }; @@ -146,7 +146,7 @@ macro_rules! def_unsupported { (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } -impl<'a, M> VisitOperator<'a> for CodeGen<'a, M> +impl<'a, 'b, 'c, M> VisitOperator<'a> for CodeGen<'a, 'b, 'c, M> where M: MacroAssembler, { @@ -197,59 +197,43 @@ where } fn visit_f32_floor(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Down, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Down, &mut self.context, OperandSize::S32); } fn visit_f64_floor(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Down, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Down, &mut self.context, OperandSize::S64); } fn visit_f32_ceil(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Up, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Up, &mut self.context, OperandSize::S32); } fn visit_f64_ceil(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Up, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Up, &mut self.context, OperandSize::S64); } fn visit_f32_nearest(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Nearest, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S32); } fn visit_f64_nearest(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Nearest, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S64); } fn visit_f32_trunc(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Zero, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Zero, &mut self.context, OperandSize::S32); } fn visit_f64_trunc(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Zero, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Zero, &mut self.context, OperandSize::S64); } fn visit_i32_add(&mut self) { @@ -627,44 +611,25 @@ where // TODO: verify the case where the target local is on the stack. fn visit_local_set(&mut self, index: u32) { - let src = self.context.set_local(self.masm, index); + let (ty, slot) = self.context.frame.get_local_address(index, self.masm); + let src = self.emit_set_local(slot, ty.into()); self.context.free_reg(src); } fn visit_call(&mut self, index: u32) { let callee = self.env.callee_from_index(FuncIndex::from_u32(index)); - self.emit_call(callee); + FnCall::emit::(self.masm, &mut self.context, |_| callee.clone()); } fn visit_call_indirect(&mut self, type_index: u32, table_index: u32, _: u8) { let type_index = TypeIndex::from_u32(type_index); let table_index = TableIndex::from_u32(table_index); - let table_data = self.env.resolve_table_data(table_index); - let ptr_type = self.env.ptr_type(); - - let builtin = self - .env - .builtins - .table_get_lazy_init_func_ref::(); - FnCall::new(&builtin.sig).with_lib( - self.masm, - &mut self.context, - &builtin, - |cx, masm, call, callee| { - CodeGen::emit_lazy_init_funcref( - &table_data, - table_index, - ptr_type, - cx, - masm, - call, - callee, - ); - }, - ); + self.emit_lazy_init_funcref(table_index); // Perform the indirect call. + // This code assumes that [`Self::emit_lazy_init_funcref`] will + // push the funcref to the value stack. match self.env.translation.module.table_plans[table_index].style { TableStyle::CallerChecksSignature => { let funcref_ptr = self.context.stack.peek().map(|v| v.get_reg()).unwrap(); @@ -674,132 +639,96 @@ where } } - // Perform call indirect. - // `emit_call` expects the callee to be on the stack. Delaying the - // computation of the callee address reduces register pressure. - self.emit_call(self.env.funcref(type_index)); + FnCall::emit::(self.masm, &mut self.context, |_| { + self.env.funcref(type_index) + }) } fn visit_table_init(&mut self, elem: u32, table: u32) { let ptr_type = self.env.ptr_type(); - let table_init = self.env.builtins.table_init::(); let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); - FnCall::new(&table_init.sig).with_lib( - self.masm, - &mut self.context, - &table_init, - |cx, masm, call, callee| { - // table.init requires at least 3 elements on the value stack. - debug_assert!(cx.stack.len() >= 3); - let extra_args = [ - vmctx.into(), - table.try_into().unwrap(), - elem.try_into().unwrap(), - ]; - let at = cx.stack.len() - 3; - cx.stack.insert_many(at, extra_args); - // Finalize the call. - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, + debug_assert!(self.context.stack.len() >= 3); + let at = self.context.stack.len() - 3; + + self.context.stack.insert_many( + at, + [ + vmctx.into(), + table.try_into().unwrap(), + elem.try_into().unwrap(), + ], ); + FnCall::emit::(self.masm, &mut self.context, |cx| { + Callee::Builtin(cx.builtins.table_init::()) + }); } fn visit_table_copy(&mut self, dst: u32, src: u32) { let ptr_type = self.env.ptr_type(); - let table_copy = self.env.builtins.table_copy::(); let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); + debug_assert!(self.context.stack.len() >= 3); + let at = self.context.stack.len() - 3; + self.context.stack.insert_many( + at, + [ + vmctx.into(), + dst.try_into().unwrap(), + src.try_into().unwrap(), + ], + ); - FnCall::new(&table_copy.sig).with_lib( - self.masm, - &mut self.context, - &table_copy, - |cx, masm, call, callee| { - // table.copy requires at least 3 elemenents in the value stack. - debug_assert!(cx.stack.len() >= 3); - let at = cx.stack.len() - 3; - cx.stack.insert_many( - at, - [ - vmctx.into(), - dst.try_into().unwrap(), - src.try_into().unwrap(), - ], - ); - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ) + FnCall::emit::(self.masm, &mut self.context, |context| { + Callee::Builtin(context.builtins.table_copy::()) + }); } fn visit_table_get(&mut self, table: u32) { - let ptr_type = self.env.ptr_type(); let table_index = TableIndex::from_u32(table); - let table_data = self.env.resolve_table_data(table_index); let plan = self.env.table_plan(table_index); let heap_type = plan.table.wasm_ty.heap_type; - let style = plan.style.clone(); - let table_get = self - .env - .builtins - .table_get_lazy_init_func_ref::(); + let style = &plan.style; - FnCall::new(&table_get.sig).with_lib( - self.masm, - &mut self.context, - &table_get, - |cx, masm, call, callee| { - match heap_type { - WasmHeapType::Func => match style { - TableStyle::CallerChecksSignature => { - CodeGen::emit_lazy_init_funcref( - &table_data, - table_index, - ptr_type, - cx, - masm, - call, - callee, - ); - } - }, - t => unimplemented!("Support for WasmHeapType: {t}"), - }; + match heap_type { + WasmHeapType::Func => match style { + TableStyle::CallerChecksSignature => self.emit_lazy_init_funcref(table_index), }, - ); + t => unimplemented!("Support for WasmHeapType: {t}"), + } } fn visit_table_grow(&mut self, table: u32) { let ptr_type = self.env.ptr_type(); + let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); let table_index = TableIndex::from_u32(table); let table_plan = self.env.table_plan(table_index); - let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); let builtin = match table_plan.table.wasm_ty.heap_type { - WasmHeapType::Func => self.env.builtins.table_grow_func_ref::(), + WasmHeapType::Func => self + .context + .builtins + .table_grow_func_ref::(), ty => unimplemented!("Support for HeapType: {ty}"), }; - FnCall::new(&builtin.sig).with_lib( - self.masm, - &mut self.context, - &builtin, - |cx, masm, call, callee| { - let len = cx.stack.len(); - // table.grow requires at least 2 elements on the value stack. - debug_assert!(len >= 2); - // The table_grow builtin expects the parameters in a different - // order. - // The value stack at this point should contain: - // [ init_value | delta ] (stack top) - // but the builtin function expects the init value as the last - // argument. - cx.stack.inner_mut().swap(len - 1, len - 2); - let at = len - 2; - cx.stack - .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); - - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ); + let len = self.context.stack.len(); + // table.grow` requires at least 2 elements on the value stack. + debug_assert!(len >= 2); + let at = len - 2; + + // The table_grow builtin expects the parameters in a different + // order. + // The value stack at this point should contain: + // [ init_value | delta ] (stack top) + // but the builtin function expects the init value as the last + // argument. + self.context.stack.inner_mut().swap(len - 1, len - 2); + self.context + .stack + .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); + + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(builtin.clone()) + }); } fn visit_table_size(&mut self, table: u32) { @@ -814,24 +743,22 @@ where let table_index = TableIndex::from_u32(table); let table_plan = self.env.table_plan(table_index); let builtin = match table_plan.table.wasm_ty.heap_type { - WasmHeapType::Func => self.env.builtins.table_fill_func_ref::(), + WasmHeapType::Func => self + .context + .builtins + .table_fill_func_ref::(), ty => unimplemented!("Support for heap type: {ty}"), }; - FnCall::new(&builtin.sig).with_lib( - self.masm, - &mut self.context, - &builtin, - |cx, masm, call, callee| { - // table.fill requires at least 3 values on the value stack. - debug_assert!(cx.stack.len() >= 3); - let at = cx.stack.len() - 3; - cx.stack - .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); - - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ); + let len = self.context.stack.len(); + debug_assert!(len >= 3); + let at = len - 3; + self.context + .stack + .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(builtin.clone()) + }) } fn visit_table_set(&mut self, table: u32) { @@ -873,18 +800,14 @@ where fn visit_elem_drop(&mut self, index: u32) { let ptr_type = self.env.ptr_type(); - let elem_drop = self.env.builtins.elem_drop::(); + let elem_drop = self.context.builtins.elem_drop::(); let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); - - FnCall::new(&elem_drop.sig).with_lib( - self.masm, - &mut self.context, - &elem_drop, - |cx, masm, call, callee| { - cx.stack.extend([vmctx.into(), index.try_into().unwrap()]); - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ); + self.context + .stack + .extend([vmctx.into(), index.try_into().unwrap()]); + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(elem_drop.clone()) + }); } fn visit_nop(&mut self) {} @@ -939,9 +862,8 @@ where let frame = &mut self.control_frames[index]; frame.set_as_target(); let result = frame.as_target_result(); - let top = self.context.without::( - result.regs(), - result.regs(), + let top = self.context.maybe_without1::( + result.result_reg(), self.masm, |ctx, masm| ctx.pop_to_reg(masm, None), ); @@ -967,9 +889,8 @@ where let default_index = control_index(targets.default(), self.control_frames.len()); let default_result = self.control_frames[default_index].as_target_result(); - let (index, tmp) = self.context.without::<(TypedReg, _), M, _>( - default_result.regs(), - default_result.regs(), + let (index, tmp) = self.context.maybe_without1::<(TypedReg, _), M, _>( + default_result.result_reg(), self.masm, |cx, masm| (cx.pop_to_reg(masm, None), cx.any_gpr(masm)), ); @@ -1023,7 +944,8 @@ where } fn visit_local_tee(&mut self, index: u32) { - let typed_reg = self.context.set_local(self.masm, index); + let (ty, slot) = self.context.frame.get_local_address(index, self.masm); + let typed_reg = self.emit_set_local(slot, ty.into()); self.context.stack.push(typed_reg.into()); } @@ -1075,7 +997,7 @@ where wasmparser::for_each_operator!(def_unsupported); } -impl<'a, M> CodeGen<'a, M> +impl<'a, 'b, 'c, M> CodeGen<'a, 'b, 'c, M> where M: MacroAssembler, { diff --git a/winch/filetests/filetests/x64/call_indirect/call_indirect.wat b/winch/filetests/filetests/x64/call_indirect/call_indirect.wat index 2719ef8f9009..68b768a633a4 100644 --- a/winch/filetests/filetests/x64/call_indirect/call_indirect.wat +++ b/winch/filetests/filetests/x64/call_indirect/call_indirect.wat @@ -41,96 +41,103 @@ ;; 21: 85c0 test eax, eax ;; 23: 0f840a000000 je 0x33 ;; 29: b801000000 mov eax, 1 -;; 2e: e925010000 jmp 0x158 +;; 2e: e92e010000 jmp 0x161 ;; 33: 8b44240c mov eax, dword ptr [rsp + 0xc] ;; 37: 83e802 sub eax, 2 ;; 3a: 50 push rax -;; 3b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 3f: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 43: bb00000000 mov ebx, 0 -;; 48: 4d89f1 mov r9, r14 -;; 4b: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 4f: 4439d3 cmp ebx, r10d -;; 52: 0f8306010000 jae 0x15e -;; 58: 4189db mov r11d, ebx -;; 5b: 4d6bdb08 imul r11, r11, 8 -;; 5f: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 63: 4d89cc mov r12, r9 -;; 66: 4d01d9 add r9, r11 -;; 69: 4439d3 cmp ebx, r10d -;; 6c: 4d0f43cc cmovae r9, r12 -;; 70: 4d8b01 mov r8, qword ptr [r9] -;; 73: 4c89c0 mov rax, r8 -;; 76: 4d85c0 test r8, r8 -;; 79: 0f8519000000 jne 0x98 -;; 7f: 4883ec08 sub rsp, 8 -;; 83: 4c89f7 mov rdi, r14 -;; 86: be00000000 mov esi, 0 -;; 8b: 89da mov edx, ebx -;; 8d: ffd1 call rcx -;; 8f: 4883c408 add rsp, 8 -;; 93: e904000000 jmp 0x9c -;; 98: 4883e0fe and rax, 0xfffffffffffffffe -;; 9c: 4885c0 test rax, rax -;; 9f: 0f84bb000000 je 0x160 -;; a5: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] -;; a9: 418b0b mov ecx, dword ptr [r11] -;; ac: 8b5018 mov edx, dword ptr [rax + 0x18] -;; af: 39d1 cmp ecx, edx -;; b1: 0f85ab000000 jne 0x162 -;; b7: 488b4810 mov rcx, qword ptr [rax + 0x10] -;; bb: 4883ec08 sub rsp, 8 -;; bf: 8b7c2408 mov edi, dword ptr [rsp + 8] -;; c3: ffd1 call rcx -;; c5: 4883c410 add rsp, 0x10 -;; c9: 8b4c240c mov ecx, dword ptr [rsp + 0xc] -;; cd: 83e901 sub ecx, 1 -;; d0: 50 push rax -;; d1: 51 push rcx -;; d2: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; d6: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; da: bb00000000 mov ebx, 0 -;; df: 4d89f1 mov r9, r14 -;; e2: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; e6: 4439d3 cmp ebx, r10d -;; e9: 0f8375000000 jae 0x164 -;; ef: 4189db mov r11d, ebx -;; f2: 4d6bdb08 imul r11, r11, 8 -;; f6: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; fa: 4d89cc mov r12, r9 -;; fd: 4d01d9 add r9, r11 -;; 100: 4439d3 cmp ebx, r10d -;; 103: 4d0f43cc cmovae r9, r12 -;; 107: 4d8b01 mov r8, qword ptr [r9] -;; 10a: 4c89c0 mov rax, r8 -;; 10d: 4d85c0 test r8, r8 -;; 110: 0f8511000000 jne 0x127 -;; 116: 4c89f7 mov rdi, r14 -;; 119: be00000000 mov esi, 0 -;; 11e: 89da mov edx, ebx -;; 120: ffd1 call rcx -;; 122: e904000000 jmp 0x12b -;; 127: 4883e0fe and rax, 0xfffffffffffffffe -;; 12b: 4885c0 test rax, rax -;; 12e: 0f8432000000 je 0x166 -;; 134: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] -;; 138: 418b0b mov ecx, dword ptr [r11] -;; 13b: 8b5018 mov edx, dword ptr [rax + 0x18] -;; 13e: 39d1 cmp ecx, edx -;; 140: 0f8522000000 jne 0x168 -;; 146: 488b4810 mov rcx, qword ptr [rax + 0x10] -;; 14a: 8b3c24 mov edi, dword ptr [rsp] -;; 14d: ffd1 call rcx -;; 14f: 4883c408 add rsp, 8 -;; 153: 59 pop rcx -;; 154: 01c1 add ecx, eax -;; 156: 89c8 mov eax, ecx -;; 158: 4883c410 add rsp, 0x10 -;; 15c: 5d pop rbp -;; 15d: c3 ret -;; 15e: 0f0b ud2 -;; 160: 0f0b ud2 -;; 162: 0f0b ud2 -;; 164: 0f0b ud2 -;; 166: 0f0b ud2 -;; 168: 0f0b ud2 +;; 3b: b900000000 mov ecx, 0 +;; 40: 4c89f2 mov rdx, r14 +;; 43: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 46: 39d9 cmp ecx, ebx +;; 48: 0f8319010000 jae 0x167 +;; 4e: 4189cb mov r11d, ecx +;; 51: 4d6bdb08 imul r11, r11, 8 +;; 55: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 59: 4889d6 mov rsi, rdx +;; 5c: 4c01da add rdx, r11 +;; 5f: 39d9 cmp ecx, ebx +;; 61: 480f43d6 cmovae rdx, rsi +;; 65: 488b02 mov rax, qword ptr [rdx] +;; 68: 4885c0 test rax, rax +;; 6b: 0f8528000000 jne 0x99 +;; 71: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 75: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 79: 4156 push r14 +;; 7b: 51 push rcx +;; 7c: 4883ec08 sub rsp, 8 +;; 80: 488b7c2410 mov rdi, qword ptr [rsp + 0x10] +;; 85: be00000000 mov esi, 0 +;; 8a: 8b542408 mov edx, dword ptr [rsp + 8] +;; 8e: ffd3 call rbx +;; 90: 4883c418 add rsp, 0x18 +;; 94: e904000000 jmp 0x9d +;; 99: 4883e0fe and rax, 0xfffffffffffffffe +;; 9d: 4885c0 test rax, rax +;; a0: 0f84c3000000 je 0x169 +;; a6: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] +;; aa: 418b0b mov ecx, dword ptr [r11] +;; ad: 8b5018 mov edx, dword ptr [rax + 0x18] +;; b0: 39d1 cmp ecx, edx +;; b2: 0f85b3000000 jne 0x16b +;; b8: 50 push rax +;; b9: 59 pop rcx +;; ba: 488b5110 mov rdx, qword ptr [rcx + 0x10] +;; be: 4883ec08 sub rsp, 8 +;; c2: 8b7c2408 mov edi, dword ptr [rsp + 8] +;; c6: ffd2 call rdx +;; c8: 4883c410 add rsp, 0x10 +;; cc: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; d0: 83e901 sub ecx, 1 +;; d3: 50 push rax +;; d4: 51 push rcx +;; d5: b900000000 mov ecx, 0 +;; da: 4c89f2 mov rdx, r14 +;; dd: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; e0: 39d9 cmp ecx, ebx +;; e2: 0f8385000000 jae 0x16d +;; e8: 4189cb mov r11d, ecx +;; eb: 4d6bdb08 imul r11, r11, 8 +;; ef: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; f3: 4889d6 mov rsi, rdx +;; f6: 4c01da add rdx, r11 +;; f9: 39d9 cmp ecx, ebx +;; fb: 480f43d6 cmovae rdx, rsi +;; ff: 488b02 mov rax, qword ptr [rdx] +;; 102: 4885c0 test rax, rax +;; 105: 0f8523000000 jne 0x12e +;; 10b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 10f: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 113: 4156 push r14 +;; 115: 51 push rcx +;; 116: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 11b: be00000000 mov esi, 0 +;; 120: 8b1424 mov edx, dword ptr [rsp] +;; 123: ffd3 call rbx +;; 125: 4883c410 add rsp, 0x10 +;; 129: e904000000 jmp 0x132 +;; 12e: 4883e0fe and rax, 0xfffffffffffffffe +;; 132: 4885c0 test rax, rax +;; 135: 0f8434000000 je 0x16f +;; 13b: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] +;; 13f: 418b0b mov ecx, dword ptr [r11] +;; 142: 8b5018 mov edx, dword ptr [rax + 0x18] +;; 145: 39d1 cmp ecx, edx +;; 147: 0f8524000000 jne 0x171 +;; 14d: 50 push rax +;; 14e: 59 pop rcx +;; 14f: 488b5110 mov rdx, qword ptr [rcx + 0x10] +;; 153: 8b3c24 mov edi, dword ptr [rsp] +;; 156: ffd2 call rdx +;; 158: 4883c408 add rsp, 8 +;; 15c: 59 pop rcx +;; 15d: 01c1 add ecx, eax +;; 15f: 89c8 mov eax, ecx +;; 161: 4883c410 add rsp, 0x10 +;; 165: 5d pop rbp +;; 166: c3 ret +;; 167: 0f0b ud2 +;; 169: 0f0b ud2 +;; 16b: 0f0b ud2 +;; 16d: 0f0b ud2 +;; 16f: 0f0b ud2 +;; 171: 0f0b ud2 diff --git a/winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat b/winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat new file mode 100644 index 000000000000..ddc23ace0472 --- /dev/null +++ b/winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.ceil) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat b/winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat new file mode 100644 index 000000000000..905d073e5a0d --- /dev/null +++ b/winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.floor) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat b/winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat new file mode 100644 index 000000000000..3641f869c46f --- /dev/null +++ b/winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.nearest) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat b/winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat new file mode 100644 index 000000000000..58a3680ea836 --- /dev/null +++ b/winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.trunc) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat b/winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat new file mode 100644 index 000000000000..c08fae754a1e --- /dev/null +++ b/winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.ceil) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat b/winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat new file mode 100644 index 000000000000..01b6e9517c0f --- /dev/null +++ b/winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.floor) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat b/winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat new file mode 100644 index 000000000000..7e957c10612b --- /dev/null +++ b/winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.nearest) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat b/winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat new file mode 100644 index 000000000000..00d68f5ae80f --- /dev/null +++ b/winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.trunc) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/table/fill.wat b/winch/filetests/filetests/x64/table/fill.wat index 0b7960d823ca..2a61212958ae 100644 --- a/winch/filetests/filetests/x64/table/fill.wat +++ b/winch/filetests/filetests/x64/table/fill.wat @@ -50,50 +50,45 @@ ;; c: 89742418 mov dword ptr [rsp + 0x18], esi ;; 10: 89542414 mov dword ptr [rsp + 0x14], edx ;; 14: 4c89742404 mov qword ptr [rsp + 4], r14 -;; 19: 448b5c2418 mov r11d, dword ptr [rsp + 0x18] -;; 1e: 4153 push r11 -;; 20: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 24: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 28: 5b pop rbx -;; 29: 4d89f1 mov r9, r14 -;; 2c: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 30: 4439d3 cmp ebx, r10d -;; 33: 0f8384000000 jae 0xbd -;; 39: 4189db mov r11d, ebx -;; 3c: 4d6bdb08 imul r11, r11, 8 -;; 40: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 44: 4d89cc mov r12, r9 -;; 47: 4d01d9 add r9, r11 -;; 4a: 4439d3 cmp ebx, r10d -;; 4d: 4d0f43cc cmovae r9, r12 -;; 51: 4d8b01 mov r8, qword ptr [r9] -;; 54: 4c89c0 mov rax, r8 -;; 57: 4d85c0 test r8, r8 -;; 5a: 0f8511000000 jne 0x71 -;; 60: 4c89f7 mov rdi, r14 -;; 63: be00000000 mov esi, 0 -;; 68: 89da mov edx, ebx -;; 6a: ffd1 call rcx +;; 19: 8b4c2418 mov ecx, dword ptr [rsp + 0x18] +;; 1d: 4c89f2 mov rdx, r14 +;; 20: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 23: 39d9 cmp ecx, ebx +;; 25: 0f8381000000 jae 0xac +;; 2b: 4189cb mov r11d, ecx +;; 2e: 4d6bdb08 imul r11, r11, 8 +;; 32: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 36: 4889d6 mov rsi, rdx +;; 39: 4c01da add rdx, r11 +;; 3c: 39d9 cmp ecx, ebx +;; 3e: 480f43d6 cmovae rdx, rsi +;; 42: 488b02 mov rax, qword ptr [rdx] +;; 45: 4885c0 test rax, rax +;; 48: 0f8523000000 jne 0x71 +;; 4e: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 52: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 56: 4156 push r14 +;; 58: 51 push rcx +;; 59: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 5e: be00000000 mov esi, 0 +;; 63: 8b1424 mov edx, dword ptr [rsp] +;; 66: ffd3 call rbx +;; 68: 4883c410 add rsp, 0x10 ;; 6c: e904000000 jmp 0x75 ;; 71: 4883e0fe and rax, 0xfffffffffffffffe ;; 75: 488944240c mov qword ptr [rsp + 0xc], rax -;; 7a: 448b5c241c mov r11d, dword ptr [rsp + 0x1c] -;; 7f: 4153 push r11 -;; 81: 4c8b5c2414 mov r11, qword ptr [rsp + 0x14] -;; 86: 4153 push r11 -;; 88: 448b5c2424 mov r11d, dword ptr [rsp + 0x24] -;; 8d: 4153 push r11 -;; 8f: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 93: 498b4368 mov rax, qword ptr [r11 + 0x68] -;; 97: 4883ec08 sub rsp, 8 -;; 9b: 4c89f7 mov rdi, r14 -;; 9e: be01000000 mov esi, 1 -;; a3: 8b542418 mov edx, dword ptr [rsp + 0x18] -;; a7: 488b4c2410 mov rcx, qword ptr [rsp + 0x10] -;; ac: 448b442408 mov r8d, dword ptr [rsp + 8] -;; b1: ffd0 call rax -;; b3: 4883c420 add rsp, 0x20 -;; b7: 4883c420 add rsp, 0x20 -;; bb: 5d pop rbp -;; bc: c3 ret -;; bd: 0f0b ud2 +;; 7a: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 7e: 498b4368 mov rax, qword ptr [r11 + 0x68] +;; 82: 4156 push r14 +;; 84: 4883ec08 sub rsp, 8 +;; 88: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 8d: be01000000 mov esi, 1 +;; 92: 8b54242c mov edx, dword ptr [rsp + 0x2c] +;; 96: 488b4c241c mov rcx, qword ptr [rsp + 0x1c] +;; 9b: 448b442424 mov r8d, dword ptr [rsp + 0x24] +;; a0: ffd0 call rax +;; a2: 4883c410 add rsp, 0x10 +;; a6: 4883c420 add rsp, 0x20 +;; aa: 5d pop rbp +;; ab: c3 ret +;; ac: 0f0b ud2 diff --git a/winch/filetests/filetests/x64/table/get.wat b/winch/filetests/filetests/x64/table/get.wat index 2c28b304d48d..58d6b0fb9dfd 100644 --- a/winch/filetests/filetests/x64/table/get.wat +++ b/winch/filetests/filetests/x64/table/get.wat @@ -22,30 +22,30 @@ ;; 4: 4883ec10 sub rsp, 0x10 ;; 8: 897c240c mov dword ptr [rsp + 0xc], edi ;; c: 4c89742404 mov qword ptr [rsp + 4], r14 -;; 11: 448b5c240c mov r11d, dword ptr [rsp + 0xc] -;; 16: 4153 push r11 -;; 18: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 1c: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 20: 5b pop rbx -;; 21: 4d89f1 mov r9, r14 -;; 24: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 28: 4439d3 cmp ebx, r10d -;; 2b: 0f8342000000 jae 0x73 -;; 31: 4189db mov r11d, ebx -;; 34: 4d6bdb08 imul r11, r11, 8 -;; 38: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 3c: 4d89cc mov r12, r9 -;; 3f: 4d01d9 add r9, r11 -;; 42: 4439d3 cmp ebx, r10d -;; 45: 4d0f43cc cmovae r9, r12 -;; 49: 4d8b01 mov r8, qword ptr [r9] -;; 4c: 4c89c0 mov rax, r8 -;; 4f: 4d85c0 test r8, r8 -;; 52: 0f8511000000 jne 0x69 -;; 58: 4c89f7 mov rdi, r14 -;; 5b: be00000000 mov esi, 0 -;; 60: 89da mov edx, ebx -;; 62: ffd1 call rcx +;; 11: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; 15: 4c89f2 mov rdx, r14 +;; 18: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 1b: 39d9 cmp ecx, ebx +;; 1d: 0f8350000000 jae 0x73 +;; 23: 4189cb mov r11d, ecx +;; 26: 4d6bdb08 imul r11, r11, 8 +;; 2a: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 2e: 4889d6 mov rsi, rdx +;; 31: 4c01da add rdx, r11 +;; 34: 39d9 cmp ecx, ebx +;; 36: 480f43d6 cmovae rdx, rsi +;; 3a: 488b02 mov rax, qword ptr [rdx] +;; 3d: 4885c0 test rax, rax +;; 40: 0f8523000000 jne 0x69 +;; 46: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 4a: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 4e: 4156 push r14 +;; 50: 51 push rcx +;; 51: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 56: be00000000 mov esi, 0 +;; 5b: 8b1424 mov edx, dword ptr [rsp] +;; 5e: ffd3 call rbx +;; 60: 4883c410 add rsp, 0x10 ;; 64: e904000000 jmp 0x6d ;; 69: 4883e0fe and rax, 0xfffffffffffffffe ;; 6d: 4883c410 add rsp, 0x10 diff --git a/winch/filetests/filetests/x64/table/grow.wat b/winch/filetests/filetests/x64/table/grow.wat index b5f97c33fc94..918d16747603 100644 --- a/winch/filetests/filetests/x64/table/grow.wat +++ b/winch/filetests/filetests/x64/table/grow.wat @@ -14,17 +14,16 @@ ;; 4: 4883ec10 sub rsp, 0x10 ;; 8: 48897c2408 mov qword ptr [rsp + 8], rdi ;; d: 4c893424 mov qword ptr [rsp], r14 -;; 11: 4c8b5c2408 mov r11, qword ptr [rsp + 8] -;; 16: 4153 push r11 -;; 18: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 1c: 498b5b50 mov rbx, qword ptr [r11 + 0x50] -;; 20: 4883ec08 sub rsp, 8 -;; 24: 4c89f7 mov rdi, r14 -;; 27: be00000000 mov esi, 0 -;; 2c: ba0a000000 mov edx, 0xa -;; 31: 488b4c2408 mov rcx, qword ptr [rsp + 8] -;; 36: ffd3 call rbx -;; 38: 4883c410 add rsp, 0x10 -;; 3c: 4883c410 add rsp, 0x10 -;; 40: 5d pop rbp -;; 41: c3 ret +;; 11: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 15: 498b5b50 mov rbx, qword ptr [r11 + 0x50] +;; 19: 4156 push r14 +;; 1b: 4883ec08 sub rsp, 8 +;; 1f: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 24: be00000000 mov esi, 0 +;; 29: ba0a000000 mov edx, 0xa +;; 2e: 488b4c2418 mov rcx, qword ptr [rsp + 0x18] +;; 33: ffd3 call rbx +;; 35: 4883c410 add rsp, 0x10 +;; 39: 4883c410 add rsp, 0x10 +;; 3d: 5d pop rbp +;; 3e: c3 ret diff --git a/winch/filetests/filetests/x64/table/init_copy_drop.wat b/winch/filetests/filetests/x64/table/init_copy_drop.wat index 523f2a9838d9..a5d12df2efc9 100644 --- a/winch/filetests/filetests/x64/table/init_copy_drop.wat +++ b/winch/filetests/filetests/x64/table/init_copy_drop.wat @@ -84,142 +84,144 @@ ;; 8: 4c893424 mov qword ptr [rsp], r14 ;; c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] ;; 10: 498b4310 mov rax, qword ptr [r11 + 0x10] -;; 14: 4883ec08 sub rsp, 8 -;; 18: 4c89f7 mov rdi, r14 -;; 1b: be00000000 mov esi, 0 -;; 20: ba01000000 mov edx, 1 -;; 25: b907000000 mov ecx, 7 -;; 2a: 41b800000000 mov r8d, 0 -;; 30: 41b904000000 mov r9d, 4 -;; 36: ffd0 call rax -;; 38: 4883c408 add rsp, 8 -;; 3c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 40: 498b4318 mov rax, qword ptr [r11 + 0x18] -;; 44: 4883ec08 sub rsp, 8 -;; 48: 4c89f7 mov rdi, r14 -;; 4b: be01000000 mov esi, 1 -;; 50: ffd0 call rax -;; 52: 4883c408 add rsp, 8 -;; 56: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 5a: 498b4310 mov rax, qword ptr [r11 + 0x10] -;; 5e: 4883ec08 sub rsp, 8 -;; 62: 4c89f7 mov rdi, r14 -;; 65: be00000000 mov esi, 0 -;; 6a: ba03000000 mov edx, 3 -;; 6f: b90f000000 mov ecx, 0xf -;; 74: 41b801000000 mov r8d, 1 -;; 7a: 41b903000000 mov r9d, 3 -;; 80: ffd0 call rax -;; 82: 4883c408 add rsp, 8 -;; 86: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 8a: 498b4318 mov rax, qword ptr [r11 + 0x18] -;; 8e: 4883ec08 sub rsp, 8 -;; 92: 4c89f7 mov rdi, r14 -;; 95: be03000000 mov esi, 3 -;; 9a: ffd0 call rax -;; 9c: 4883c408 add rsp, 8 -;; a0: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; a4: 498b4308 mov rax, qword ptr [r11 + 8] -;; a8: 4883ec08 sub rsp, 8 -;; ac: 4c89f7 mov rdi, r14 -;; af: be00000000 mov esi, 0 -;; b4: ba00000000 mov edx, 0 -;; b9: b914000000 mov ecx, 0x14 -;; be: 41b80f000000 mov r8d, 0xf -;; c4: 41b905000000 mov r9d, 5 -;; ca: ffd0 call rax -;; cc: 4883c408 add rsp, 8 -;; d0: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; d4: 498b4308 mov rax, qword ptr [r11 + 8] -;; d8: 4883ec08 sub rsp, 8 -;; dc: 4c89f7 mov rdi, r14 -;; df: be00000000 mov esi, 0 -;; e4: ba00000000 mov edx, 0 -;; e9: b915000000 mov ecx, 0x15 -;; ee: 41b81d000000 mov r8d, 0x1d -;; f4: 41b901000000 mov r9d, 1 -;; fa: ffd0 call rax -;; fc: 4883c408 add rsp, 8 -;; 100: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 104: 498b4308 mov rax, qword ptr [r11 + 8] -;; 108: 4883ec08 sub rsp, 8 -;; 10c: 4c89f7 mov rdi, r14 -;; 10f: be00000000 mov esi, 0 -;; 114: ba00000000 mov edx, 0 -;; 119: b918000000 mov ecx, 0x18 -;; 11e: 41b80a000000 mov r8d, 0xa -;; 124: 41b901000000 mov r9d, 1 -;; 12a: ffd0 call rax -;; 12c: 4883c408 add rsp, 8 -;; 130: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 134: 498b4308 mov rax, qword ptr [r11 + 8] -;; 138: 4883ec08 sub rsp, 8 -;; 13c: 4c89f7 mov rdi, r14 -;; 13f: be00000000 mov esi, 0 -;; 144: ba00000000 mov edx, 0 -;; 149: b90d000000 mov ecx, 0xd -;; 14e: 41b80b000000 mov r8d, 0xb -;; 154: 41b904000000 mov r9d, 4 -;; 15a: ffd0 call rax -;; 15c: 4883c408 add rsp, 8 -;; 160: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 164: 498b4308 mov rax, qword ptr [r11 + 8] -;; 168: 4883ec08 sub rsp, 8 -;; 16c: 4c89f7 mov rdi, r14 -;; 16f: be00000000 mov esi, 0 -;; 174: ba00000000 mov edx, 0 -;; 179: b913000000 mov ecx, 0x13 -;; 17e: 41b814000000 mov r8d, 0x14 -;; 184: 41b905000000 mov r9d, 5 -;; 18a: ffd0 call rax -;; 18c: 4883c408 add rsp, 8 -;; 190: 4883c408 add rsp, 8 -;; 194: 5d pop rbp -;; 195: c3 ret +;; 14: 4156 push r14 +;; 16: 488b3c24 mov rdi, qword ptr [rsp] +;; 1a: be00000000 mov esi, 0 +;; 1f: ba01000000 mov edx, 1 +;; 24: b907000000 mov ecx, 7 +;; 29: 41b800000000 mov r8d, 0 +;; 2f: 41b904000000 mov r9d, 4 +;; 35: ffd0 call rax +;; 37: 4883c408 add rsp, 8 +;; 3b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 3f: 498b4318 mov rax, qword ptr [r11 + 0x18] +;; 43: 4156 push r14 +;; 45: 488b3c24 mov rdi, qword ptr [rsp] +;; 49: be01000000 mov esi, 1 +;; 4e: ffd0 call rax +;; 50: 4883c408 add rsp, 8 +;; 54: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 58: 498b4310 mov rax, qword ptr [r11 + 0x10] +;; 5c: 4156 push r14 +;; 5e: 488b3c24 mov rdi, qword ptr [rsp] +;; 62: be00000000 mov esi, 0 +;; 67: ba03000000 mov edx, 3 +;; 6c: b90f000000 mov ecx, 0xf +;; 71: 41b801000000 mov r8d, 1 +;; 77: 41b903000000 mov r9d, 3 +;; 7d: ffd0 call rax +;; 7f: 4883c408 add rsp, 8 +;; 83: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 87: 498b4318 mov rax, qword ptr [r11 + 0x18] +;; 8b: 4156 push r14 +;; 8d: 488b3c24 mov rdi, qword ptr [rsp] +;; 91: be03000000 mov esi, 3 +;; 96: ffd0 call rax +;; 98: 4883c408 add rsp, 8 +;; 9c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; a0: 498b4308 mov rax, qword ptr [r11 + 8] +;; a4: 4156 push r14 +;; a6: 488b3c24 mov rdi, qword ptr [rsp] +;; aa: be00000000 mov esi, 0 +;; af: ba00000000 mov edx, 0 +;; b4: b914000000 mov ecx, 0x14 +;; b9: 41b80f000000 mov r8d, 0xf +;; bf: 41b905000000 mov r9d, 5 +;; c5: ffd0 call rax +;; c7: 4883c408 add rsp, 8 +;; cb: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; cf: 498b4308 mov rax, qword ptr [r11 + 8] +;; d3: 4156 push r14 +;; d5: 488b3c24 mov rdi, qword ptr [rsp] +;; d9: be00000000 mov esi, 0 +;; de: ba00000000 mov edx, 0 +;; e3: b915000000 mov ecx, 0x15 +;; e8: 41b81d000000 mov r8d, 0x1d +;; ee: 41b901000000 mov r9d, 1 +;; f4: ffd0 call rax +;; f6: 4883c408 add rsp, 8 +;; fa: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; fe: 498b4308 mov rax, qword ptr [r11 + 8] +;; 102: 4156 push r14 +;; 104: 488b3c24 mov rdi, qword ptr [rsp] +;; 108: be00000000 mov esi, 0 +;; 10d: ba00000000 mov edx, 0 +;; 112: b918000000 mov ecx, 0x18 +;; 117: 41b80a000000 mov r8d, 0xa +;; 11d: 41b901000000 mov r9d, 1 +;; 123: ffd0 call rax +;; 125: 4883c408 add rsp, 8 +;; 129: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 12d: 498b4308 mov rax, qword ptr [r11 + 8] +;; 131: 4156 push r14 +;; 133: 488b3c24 mov rdi, qword ptr [rsp] +;; 137: be00000000 mov esi, 0 +;; 13c: ba00000000 mov edx, 0 +;; 141: b90d000000 mov ecx, 0xd +;; 146: 41b80b000000 mov r8d, 0xb +;; 14c: 41b904000000 mov r9d, 4 +;; 152: ffd0 call rax +;; 154: 4883c408 add rsp, 8 +;; 158: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 15c: 498b4308 mov rax, qword ptr [r11 + 8] +;; 160: 4156 push r14 +;; 162: 488b3c24 mov rdi, qword ptr [rsp] +;; 166: be00000000 mov esi, 0 +;; 16b: ba00000000 mov edx, 0 +;; 170: b913000000 mov ecx, 0x13 +;; 175: 41b814000000 mov r8d, 0x14 +;; 17b: 41b905000000 mov r9d, 5 +;; 181: ffd0 call rax +;; 183: 4883c408 add rsp, 8 +;; 187: 4883c408 add rsp, 8 +;; 18b: 5d pop rbp +;; 18c: c3 ret ;; ;; 0: 55 push rbp ;; 1: 4889e5 mov rbp, rsp ;; 4: 4883ec10 sub rsp, 0x10 ;; 8: 897c240c mov dword ptr [rsp + 0xc], edi ;; c: 4c89742404 mov qword ptr [rsp + 4], r14 -;; 11: 448b5c240c mov r11d, dword ptr [rsp + 0xc] -;; 16: 4153 push r11 -;; 18: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 1c: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 20: 5b pop rbx -;; 21: 4d89f1 mov r9, r14 -;; 24: 458b91f0000000 mov r10d, dword ptr [r9 + 0xf0] -;; 2b: 4439d3 cmp ebx, r10d -;; 2e: 0f8366000000 jae 0x9a -;; 34: 4189db mov r11d, ebx -;; 37: 4d6bdb08 imul r11, r11, 8 -;; 3b: 4d8b89e8000000 mov r9, qword ptr [r9 + 0xe8] -;; 42: 4d89cc mov r12, r9 -;; 45: 4d01d9 add r9, r11 -;; 48: 4439d3 cmp ebx, r10d -;; 4b: 4d0f43cc cmovae r9, r12 -;; 4f: 4d8b01 mov r8, qword ptr [r9] -;; 52: 4c89c0 mov rax, r8 -;; 55: 4d85c0 test r8, r8 -;; 58: 0f8511000000 jne 0x6f -;; 5e: 4c89f7 mov rdi, r14 -;; 61: be00000000 mov esi, 0 -;; 66: 89da mov edx, ebx -;; 68: ffd1 call rcx +;; 11: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; 15: 4c89f2 mov rdx, r14 +;; 18: 8b9af0000000 mov ebx, dword ptr [rdx + 0xf0] +;; 1e: 39d9 cmp ecx, ebx +;; 20: 0f8376000000 jae 0x9c +;; 26: 4189cb mov r11d, ecx +;; 29: 4d6bdb08 imul r11, r11, 8 +;; 2d: 488b92e8000000 mov rdx, qword ptr [rdx + 0xe8] +;; 34: 4889d6 mov rsi, rdx +;; 37: 4c01da add rdx, r11 +;; 3a: 39d9 cmp ecx, ebx +;; 3c: 480f43d6 cmovae rdx, rsi +;; 40: 488b02 mov rax, qword ptr [rdx] +;; 43: 4885c0 test rax, rax +;; 46: 0f8523000000 jne 0x6f +;; 4c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 50: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 54: 4156 push r14 +;; 56: 51 push rcx +;; 57: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 5c: be00000000 mov esi, 0 +;; 61: 8b1424 mov edx, dword ptr [rsp] +;; 64: ffd3 call rbx +;; 66: 4883c410 add rsp, 0x10 ;; 6a: e904000000 jmp 0x73 ;; 6f: 4883e0fe and rax, 0xfffffffffffffffe ;; 73: 4885c0 test rax, rax -;; 76: 0f8420000000 je 0x9c +;; 76: 0f8422000000 je 0x9e ;; 7c: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] ;; 80: 418b0b mov ecx, dword ptr [r11] ;; 83: 8b5018 mov edx, dword ptr [rax + 0x18] ;; 86: 39d1 cmp ecx, edx -;; 88: 0f8510000000 jne 0x9e -;; 8e: 488b4810 mov rcx, qword ptr [rax + 0x10] -;; 92: ffd1 call rcx -;; 94: 4883c410 add rsp, 0x10 -;; 98: 5d pop rbp -;; 99: c3 ret -;; 9a: 0f0b ud2 +;; 88: 0f8512000000 jne 0xa0 +;; 8e: 50 push rax +;; 8f: 59 pop rcx +;; 90: 488b5110 mov rdx, qword ptr [rcx + 0x10] +;; 94: ffd2 call rdx +;; 96: 4883c410 add rsp, 0x10 +;; 9a: 5d pop rbp +;; 9b: c3 ret ;; 9c: 0f0b ud2 ;; 9e: 0f0b ud2 +;; a0: 0f0b ud2 diff --git a/winch/filetests/filetests/x64/table/set.wat b/winch/filetests/filetests/x64/table/set.wat index 55c325fcb011..b3cfb2b0f56e 100644 --- a/winch/filetests/filetests/x64/table/set.wat +++ b/winch/filetests/filetests/x64/table/set.wat @@ -54,52 +54,48 @@ ;; 8: 897c240c mov dword ptr [rsp + 0xc], edi ;; c: 89742408 mov dword ptr [rsp + 8], esi ;; 10: 4c893424 mov qword ptr [rsp], r14 -;; 14: 448b5c240c mov r11d, dword ptr [rsp + 0xc] -;; 19: 4153 push r11 -;; 1b: 448b5c2410 mov r11d, dword ptr [rsp + 0x10] -;; 20: 4153 push r11 -;; 22: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 26: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 2a: 5b pop rbx -;; 2b: 4d89f1 mov r9, r14 -;; 2e: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 32: 4439d3 cmp ebx, r10d -;; 35: 0f8377000000 jae 0xb2 -;; 3b: 4189db mov r11d, ebx -;; 3e: 4d6bdb08 imul r11, r11, 8 -;; 42: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 46: 4d89cc mov r12, r9 -;; 49: 4d01d9 add r9, r11 -;; 4c: 4439d3 cmp ebx, r10d -;; 4f: 4d0f43cc cmovae r9, r12 -;; 53: 4d8b01 mov r8, qword ptr [r9] -;; 56: 4c89c0 mov rax, r8 -;; 59: 4d85c0 test r8, r8 -;; 5c: 0f8519000000 jne 0x7b -;; 62: 4883ec08 sub rsp, 8 -;; 66: 4c89f7 mov rdi, r14 -;; 69: be00000000 mov esi, 0 -;; 6e: 89da mov edx, ebx -;; 70: ffd1 call rcx -;; 72: 4883c408 add rsp, 8 -;; 76: e904000000 jmp 0x7f -;; 7b: 4883e0fe and rax, 0xfffffffffffffffe -;; 7f: 59 pop rcx -;; 80: 4c89f2 mov rdx, r14 -;; 83: 8b5a50 mov ebx, dword ptr [rdx + 0x50] -;; 86: 39d9 cmp ecx, ebx -;; 88: 0f8326000000 jae 0xb4 -;; 8e: 4189cb mov r11d, ecx -;; 91: 4d6bdb08 imul r11, r11, 8 -;; 95: 488b5248 mov rdx, qword ptr [rdx + 0x48] -;; 99: 4889d6 mov rsi, rdx -;; 9c: 4c01da add rdx, r11 -;; 9f: 39d9 cmp ecx, ebx -;; a1: 480f43d6 cmovae rdx, rsi -;; a5: 4883c801 or rax, 1 -;; a9: 488902 mov qword ptr [rdx], rax -;; ac: 4883c410 add rsp, 0x10 -;; b0: 5d pop rbp -;; b1: c3 ret -;; b2: 0f0b ud2 -;; b4: 0f0b ud2 +;; 14: 8b4c2408 mov ecx, dword ptr [rsp + 8] +;; 18: 4c89f2 mov rdx, r14 +;; 1b: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 1e: 39d9 cmp ecx, ebx +;; 20: 0f8380000000 jae 0xa6 +;; 26: 4189cb mov r11d, ecx +;; 29: 4d6bdb08 imul r11, r11, 8 +;; 2d: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 31: 4889d6 mov rsi, rdx +;; 34: 4c01da add rdx, r11 +;; 37: 39d9 cmp ecx, ebx +;; 39: 480f43d6 cmovae rdx, rsi +;; 3d: 488b02 mov rax, qword ptr [rdx] +;; 40: 4885c0 test rax, rax +;; 43: 0f8523000000 jne 0x6c +;; 49: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 4d: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 51: 4156 push r14 +;; 53: 51 push rcx +;; 54: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 59: be00000000 mov esi, 0 +;; 5e: 8b1424 mov edx, dword ptr [rsp] +;; 61: ffd3 call rbx +;; 63: 4883c410 add rsp, 0x10 +;; 67: e904000000 jmp 0x70 +;; 6c: 4883e0fe and rax, 0xfffffffffffffffe +;; 70: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; 74: 4c89f2 mov rdx, r14 +;; 77: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 7a: 39d9 cmp ecx, ebx +;; 7c: 0f8326000000 jae 0xa8 +;; 82: 4189cb mov r11d, ecx +;; 85: 4d6bdb08 imul r11, r11, 8 +;; 89: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 8d: 4889d6 mov rsi, rdx +;; 90: 4c01da add rdx, r11 +;; 93: 39d9 cmp ecx, ebx +;; 95: 480f43d6 cmovae rdx, rsi +;; 99: 4883c801 or rax, 1 +;; 9d: 488902 mov qword ptr [rdx], rax +;; a0: 4883c410 add rsp, 0x10 +;; a4: 5d pop rbp +;; a5: c3 ret +;; a6: 0f0b ud2 +;; a8: 0f0b ud2 diff --git a/winch/filetests/src/lib.rs b/winch/filetests/src/lib.rs index 6b435b701a8f..aabcab240837 100644 --- a/winch/filetests/src/lib.rs +++ b/winch/filetests/src/lib.rs @@ -13,8 +13,9 @@ mod test { use wasmtime_environ::{ wasmparser::{Parser as WasmParser, Validator}, DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, ModuleTypes, Tunables, TypeConvert, + VMOffsets, }; - use winch_codegen::{lookup, TargetIsa}; + use winch_codegen::{lookup, BuiltinFunctions, TargetIsa}; use winch_test_macros::generate_file_tests; #[derive(Clone, Debug, Serialize, Deserialize)] @@ -158,10 +159,19 @@ mod test { let sig = types[types.function_at(index.as_u32())].unwrap_func(); let sig = translation.module.convert_func_type(&sig); + let vmoffsets = VMOffsets::new(isa.pointer_bytes(), &translation.module); + let mut builtins = BuiltinFunctions::new(&vmoffsets, isa.wasmtime_call_conv()); let FunctionBodyData { body, validator } = f.1; let mut validator = validator.into_validator(Default::default()); let buffer = isa - .compile_function(&sig, module_types, &body, &translation, &mut validator) + .compile_function( + &sig, + &body, + translation, + module_types, + &mut builtins, + &mut validator, + ) .expect("Couldn't compile function"); disasm(buffer.data(), isa).unwrap() diff --git a/winch/src/compile.rs b/winch/src/compile.rs index 2d8b929022d6..ebf6061080af 100644 --- a/winch/src/compile.rs +++ b/winch/src/compile.rs @@ -6,9 +6,9 @@ use target_lexicon::Triple; use wasmtime_environ::{ wasmparser::{Parser as WasmParser, Validator}, DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, ModuleTranslation, ModuleTypes, - Tunables, TypeConvert, + Tunables, TypeConvert, VMOffsets, }; -use winch_codegen::{lookup, TargetIsa}; +use winch_codegen::{lookup, BuiltinFunctions, TargetIsa}; use winch_filetests::disasm::disasm; #[derive(Parser, Debug)] @@ -57,9 +57,18 @@ fn compile( let sig = types[types.function_at(index.as_u32())].unwrap_func(); let sig = translation.module.convert_func_type(sig); let FunctionBodyData { body, validator } = f.1; + let vmoffsets = VMOffsets::new(isa.pointer_bytes(), &translation.module); + let mut builtins = BuiltinFunctions::new(&vmoffsets, isa.wasmtime_call_conv()); let mut validator = validator.into_validator(Default::default()); let buffer = isa - .compile_function(&sig, module_types, &body, &translation, &mut validator) + .compile_function( + &sig, + &body, + translation, + module_types, + &mut builtins, + &mut validator, + ) .expect("Couldn't compile function"); println!("Disassembly for function: {}", index.as_u32());