Skip to content

Commit

Permalink
Implement lazy VMCallerCheckedAnyfunc initialization.
Browse files Browse the repository at this point in the history
Currently, in the instance initialization path, we build an "anyfunc"
for every imported function and every function in the module. These
anyfuncs are used as function references in tables, both within a module
and across modules (via function imports).

Building all of these is quite wasteful if we never use most of them.
Instead, it would be better to build only the ones that we use.

This commit implements a technique to lazily initialize them: at the
point that a pointer to an anyfunc is taken, we check whether it has
actually been initialized. If it hasn't, we do so. We track whether an
anyfunc has been initialized with a separate bitmap, and we only need to
zero this bitmap at instantiation time. (This is important for
performance too: even just zeroing a large array of anyfunc structs can
be expensive, at the microsecond scale, for a module with thousands of
functions.)

Keeping the information around that is needed for this lazy init
required a bit of refactoring in the InstanceAllocationRequest; a few
fields now live in an `Arc` held by the instance while it exists.
  • Loading branch information
cfallin committed Jan 28, 2022
1 parent 90e7cef commit 40ce633
Show file tree
Hide file tree
Showing 20 changed files with 278 additions and 107 deletions.
14 changes: 10 additions & 4 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1248,10 +1248,16 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
mut pos: cranelift_codegen::cursor::FuncCursor<'_>,
func_index: FuncIndex,
) -> WasmResult<ir::Value> {
let vmctx = self.vmctx(&mut pos.func);
let vmctx = pos.ins().global_value(self.pointer_type(), vmctx);
let offset = self.offsets.vmctx_anyfunc(func_index);
Ok(pos.ins().iadd_imm(vmctx, i64::from(offset)))
let func_index = pos.ins().iconst(I32, func_index.as_u32() as i64);
let builtin_index = BuiltinFunctionIndex::ref_func();
let builtin_sig = self.builtin_function_signatures.ref_func(&mut pos.func);
let (vmctx, builtin_addr) =
self.translate_load_builtin_function_address(&mut pos, builtin_index);

let call_inst = pos
.ins()
.call_indirect(builtin_sig, builtin_addr, &[vmctx, func_index]);
Ok(pos.func.dfg.first_result(call_inst))
}

fn translate_custom_global_get(
Expand Down
2 changes: 2 additions & 0 deletions crates/environ/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ macro_rules! foreach_builtin_function {
memory_fill(vmctx, i32, i64, i32, i64) -> ();
/// Returns an index for wasm's `memory.init` instruction.
memory_init(vmctx, i32, i32, i64, i32, i32) -> ();
/// Returns a value for wasm's `ref.func` instruction.
ref_func(vmctx, i32) -> (pointer);
/// Returns an index for wasm's `data.drop` instruction.
data_drop(vmctx, i32) -> ();
/// Returns an index for Wasm's `table.grow` instruction for `funcref`s.
Expand Down
23 changes: 22 additions & 1 deletion crates/environ/src/vmoffsets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ pub struct VMOffsets<P> {
defined_memories: u32,
defined_globals: u32,
defined_anyfuncs: u32,
anyfuncs_init: u32,
anyfuncs_init_u64_words: u32,
builtin_functions: u32,
size: u32,
}
Expand Down Expand Up @@ -187,6 +189,8 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
defined_memories: 0,
defined_globals: 0,
defined_anyfuncs: 0,
anyfuncs_init: 0,
anyfuncs_init_u64_words: 0,
builtin_functions: 0,
size: 0,
};
Expand Down Expand Up @@ -275,7 +279,7 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap(),
)
.unwrap();
ret.builtin_functions = ret
ret.anyfuncs_init = ret
.defined_anyfuncs
.checked_add(
ret.num_imported_functions
Expand All @@ -285,6 +289,11 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap(),
)
.unwrap();
ret.anyfuncs_init_u64_words = (ret.num_defined_functions + 63) / 64;
ret.builtin_functions = ret
.anyfuncs_init
.checked_add(ret.anyfuncs_init_u64_words.checked_mul(8).unwrap())
.unwrap();
ret.size = ret
.builtin_functions
.checked_add(
Expand Down Expand Up @@ -589,6 +598,18 @@ impl<P: PtrSize> VMOffsets<P> {
self.defined_globals
}

/// The offset of the `anyfuncs_init` bitset.
#[inline]
pub fn vmctx_anyfuncs_init_begin(&self) -> u32 {
self.anyfuncs_init
}

/// The length of the `anyfuncs_init` bitset in bytes.
#[inline]
pub fn vmctx_anyfuncs_init_len(&self) -> u32 {
self.anyfuncs_init_u64_words * 8
}

/// The offset of the `anyfuncs` array.
#[inline]
pub fn vmctx_anyfuncs_begin(&self) -> u32 {
Expand Down
6 changes: 3 additions & 3 deletions crates/jit/src/instantiate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ pub struct CompiledModule {
address_map_data: Range<usize>,
trap_data: Range<usize>,
module: Arc<Module>,
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
funcs: Arc<PrimaryMap<DefinedFuncIndex, FunctionInfo>>,
trampolines: Vec<Trampoline>,
meta: Metadata,
code: Range<usize>,
Expand Down Expand Up @@ -298,7 +298,7 @@ impl CompiledModule {

let mut ret = Self {
module: Arc::new(info.module),
funcs: info.funcs,
funcs: Arc::new(info.funcs),
trampolines: info.trampolines,
wasm_data: subslice_range(section(ELF_WASM_DATA)?, code.mmap),
address_map_data: code
Expand Down Expand Up @@ -374,7 +374,7 @@ impl CompiledModule {
}

/// Returns the `FunctionInfo` map for all defined functions.
pub fn functions(&self) -> &PrimaryMap<DefinedFuncIndex, FunctionInfo> {
pub fn functions(&self) -> &Arc<PrimaryMap<DefinedFuncIndex, FunctionInfo>> {
&self.funcs
}

Expand Down
96 changes: 94 additions & 2 deletions crates/runtime/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::convert::TryFrom;
use std::hash::Hash;
use std::ops::Range;
use std::ptr::NonNull;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::{mem, ptr, slice};
use wasmtime_environ::{
Expand Down Expand Up @@ -54,6 +54,9 @@ pub(crate) struct Instance {
/// The `Module` this `Instance` was instantiated from.
module: Arc<Module>,

/// The instantiation info needed for deferred initialization.
info: Arc<InstanceAllocationInfo>,

/// Offsets in the `vmctx` region, precomputed from the `module` above.
offsets: VMOffsets<HostPtr>,

Expand Down Expand Up @@ -433,6 +436,46 @@ impl Instance {
Layout::from_size_align(size, align).unwrap()
}

/// Construct a new VMCallerCheckedAnyfunc for the given function
/// (imported or defined in this module). Used during lazy
/// initialization the first time the VMCallerCheckedAnyfunc in
/// the VMContext is referenced; written into place in a careful
/// atomic way by `get_caller_checked_anyfunc()` below.
fn construct_anyfunc(&self, index: FuncIndex) -> VMCallerCheckedAnyfunc {
let sig = self.module.functions[index];
let type_index = self.info.shared_signatures.lookup(sig);

let (func_ptr, vmctx) = if let Some(def_index) = self.module.defined_func_index(index) {
(
(self.info.image_base + self.info.functions[def_index].start as usize) as *mut _,
self.vmctx_ptr(),
)
} else {
let import = self.imported_function(index);
(import.body.as_ptr(), import.vmctx)
};

VMCallerCheckedAnyfunc {
func_ptr,
type_index,
vmctx,
}
}

/// Get a word of the bitmap of anyfunc-initialized bits, and the
/// bitmask for the particular bit.
pub(crate) fn get_anyfunc_bitmap_word(&self, index: FuncIndex) -> (*const AtomicU64, u64) {
let word_index = index.as_u32() / 64;
let bit_index = index.as_u32() % 64;
let word = unsafe {
self.vmctx_plus_offset::<AtomicU64>(
self.offsets.vmctx_anyfuncs_init_begin() + (word_index * 8),
)
};
let mask = 1u64 << (bit_index as u64);
(word, mask)
}

/// Get a `&VMCallerCheckedAnyfunc` for the given `FuncIndex`.
///
/// Returns `None` if the index is the reserved index value.
Expand All @@ -447,7 +490,55 @@ impl Instance {
return None;
}

unsafe { Some(&*self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index))) }
unsafe {
let anyfunc =
self.vmctx_plus_offset::<VMCallerCheckedAnyfunc>(self.offsets.vmctx_anyfunc(index));

// Check the bitmap to see if we need to initialize.
let (bitmap_word, bitmask) = self.get_anyfunc_bitmap_word(index);
let bitmap_value = (*bitmap_word).load(Ordering::Acquire);
if bitmap_value & bitmask == 0 {
// Write the constructed anyfunc. This may race
// with others, but it is a benign race because
// the written values are the same from every
// thread; in other words, the writes are
// idempotent.
//
// Note that we're racing without using
// atomics. Yikes! Undefined behavior! We're actually
// constrained by the need to return a normal &anyfunc
// with normal (non-atomic) fields; there's no way
// around having the non-atomic value stored somewhere
// persistently. What are the potential consequences
// of this?
//
// In practice, it means that (i) we get no fences
// whatsoever for `anyfunc`, and (ii) the compiler
// assumes it can cache loaded values in memory
// without reloading them (ie the pointers are not
// "volatile"). This is actually all we need -- normal
// dataflow dependencies mean that within the thread
// that does the init, the normal stores are seen by
// normal loads to the returned pointer. And the
// fences implicit in the bitmap load and store are
// enough to make anyone else who observes a "1" bit
// on another thread also see our normal stores. And
// caching values is fine, because once they're
// written, they are always the same, even if someone
// else comes in and does another initialization.
*anyfunc = self.construct_anyfunc(index);
// Set the bit in the bitmap. Note that this may
// *unset* some other bits, if we're racing with
// someone else who's setting another bit in the same
// word. That's fine; they'll just do another
// (idempotent, writing the same bytes over and over
// again) init of another anyfunc!
(*bitmap_word).store(bitmap_value | bitmask, Ordering::Release);
}

let anyfunc = anyfunc.as_ref().unwrap();
Some(anyfunc)
}
}

unsafe fn anyfunc_base(&self) -> *mut VMCallerCheckedAnyfunc {
Expand Down Expand Up @@ -513,6 +604,7 @@ impl Instance {
ptr::null_mut()
} else {
debug_assert!(idx.as_u32() < self.offsets.num_defined_functions);
self.get_caller_checked_anyfunc(*idx); // Force lazy init
base.add(usize::try_from(idx.as_u32()).unwrap())
}
}),
Expand Down
Loading

0 comments on commit 40ce633

Please sign in to comment.