Skip to content

Commit

Permalink
Implement lazy VMCallerCheckedAnyfunc initialization.
Browse files Browse the repository at this point in the history
Currently, in the instance initialization path, we build an "anyfunc"
for every imported function and every function in the module. These
anyfuncs are used as function references in tables, both within a module
and across modules (via function imports).

Building all of these is quite wasteful if we never use most of them.
Instead, it would be better to build only the ones that we use.

This commit implements a technique to lazily initialize them: at the
point that a pointer to an anyfunc is taken, we check whether it has
actually been initialized. If it hasn't, we do so. We track whether an
anyfunc has been initialized with a separate bitmap, and we only need to
zero this bitmap at instantiation time. (This is important for
performance too: even just zeroing a large array of anyfunc structs can
be expensive, at the microsecond scale, for a module with thousands of
functions.)

Keeping the information around that is needed for this lazy init
required a bit of refactoring in the InstanceAllocationRequest; a few
fields now live in an `Arc` held by the instance while it exists.
  • Loading branch information
cfallin committed Jan 27, 2022
1 parent 90e7cef commit f45f689
Show file tree
Hide file tree
Showing 19 changed files with 278 additions and 101 deletions.
14 changes: 10 additions & 4 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1248,10 +1248,16 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
mut pos: cranelift_codegen::cursor::FuncCursor<'_>,
func_index: FuncIndex,
) -> WasmResult<ir::Value> {
let vmctx = self.vmctx(&mut pos.func);
let vmctx = pos.ins().global_value(self.pointer_type(), vmctx);
let offset = self.offsets.vmctx_anyfunc(func_index);
Ok(pos.ins().iadd_imm(vmctx, i64::from(offset)))
let func_index = pos.ins().iconst(I32, func_index.as_u32() as i64);
let builtin_index = BuiltinFunctionIndex::ref_func();
let builtin_sig = self.builtin_function_signatures.ref_func(&mut pos.func);
let (vmctx, builtin_addr) =
self.translate_load_builtin_function_address(&mut pos, builtin_index);

let call_inst = pos
.ins()
.call_indirect(builtin_sig, builtin_addr, &[vmctx, func_index]);
Ok(pos.func.dfg.first_result(call_inst))
}

fn translate_custom_global_get(
Expand Down
2 changes: 2 additions & 0 deletions crates/environ/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ macro_rules! foreach_builtin_function {
memory_fill(vmctx, i32, i64, i32, i64) -> ();
/// Returns an index for wasm's `memory.init` instruction.
memory_init(vmctx, i32, i32, i64, i32, i32) -> ();
/// Returns a value for wasm's `ref.func` instruction.
ref_func(vmctx, i32) -> (pointer);
/// Returns an index for wasm's `data.drop` instruction.
data_drop(vmctx, i32) -> ();
/// Returns an index for Wasm's `table.grow` instruction for `funcref`s.
Expand Down
23 changes: 22 additions & 1 deletion crates/environ/src/vmoffsets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ pub struct VMOffsets<P> {
defined_memories: u32,
defined_globals: u32,
defined_anyfuncs: u32,
anyfuncs_init: u32,
anyfuncs_init_u64_words: u32,
builtin_functions: u32,
size: u32,
}
Expand Down Expand Up @@ -187,6 +189,8 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
defined_memories: 0,
defined_globals: 0,
defined_anyfuncs: 0,
anyfuncs_init: 0,
anyfuncs_init_u64_words: 0,
builtin_functions: 0,
size: 0,
};
Expand Down Expand Up @@ -275,7 +279,7 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap(),
)
.unwrap();
ret.builtin_functions = ret
ret.anyfuncs_init = ret
.defined_anyfuncs
.checked_add(
ret.num_imported_functions
Expand All @@ -285,6 +289,11 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap(),
)
.unwrap();
ret.anyfuncs_init_u64_words = (ret.num_defined_functions + 63) / 64;
ret.builtin_functions = ret
.anyfuncs_init
.checked_add(ret.anyfuncs_init_u64_words.checked_mul(8).unwrap())
.unwrap();
ret.size = ret
.builtin_functions
.checked_add(
Expand Down Expand Up @@ -589,6 +598,18 @@ impl<P: PtrSize> VMOffsets<P> {
self.defined_globals
}

/// The offset of the `anyfuncs_init` bitset.
#[inline]
pub fn vmctx_anyfuncs_init_begin(&self) -> u32 {
self.anyfuncs_init
}

/// The length of the `anyfuncs_init` bitset in bytes.
#[inline]
pub fn vmctx_anyfuncs_init_len(&self) -> u32 {
self.anyfuncs_init_u64_words * 8
}

/// The offset of the `anyfuncs` array.
#[inline]
pub fn vmctx_anyfuncs_begin(&self) -> u32 {
Expand Down
6 changes: 3 additions & 3 deletions crates/jit/src/instantiate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ pub struct CompiledModule {
address_map_data: Range<usize>,
trap_data: Range<usize>,
module: Arc<Module>,
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
funcs: Arc<PrimaryMap<DefinedFuncIndex, FunctionInfo>>,
trampolines: Vec<Trampoline>,
meta: Metadata,
code: Range<usize>,
Expand Down Expand Up @@ -298,7 +298,7 @@ impl CompiledModule {

let mut ret = Self {
module: Arc::new(info.module),
funcs: info.funcs,
funcs: Arc::new(info.funcs),
trampolines: info.trampolines,
wasm_data: subslice_range(section(ELF_WASM_DATA)?, code.mmap),
address_map_data: code
Expand Down Expand Up @@ -374,7 +374,7 @@ impl CompiledModule {
}

/// Returns the `FunctionInfo` map for all defined functions.
pub fn functions(&self) -> &PrimaryMap<DefinedFuncIndex, FunctionInfo> {
pub fn functions(&self) -> &Arc<PrimaryMap<DefinedFuncIndex, FunctionInfo>> {
&self.funcs
}

Expand Down
102 changes: 101 additions & 1 deletion crates/runtime/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ pub(crate) struct Instance {
/// The `Module` this `Instance` was instantiated from.
module: Arc<Module>,

/// The instantiation info needed for deferred initialization.
info: Arc<InstanceAllocationInfo>,

/// Offsets in the `vmctx` region, precomputed from the `module` above.
offsets: VMOffsets<HostPtr>,

Expand Down Expand Up @@ -433,6 +436,48 @@ impl Instance {
Layout::from_size_align(size, align).unwrap()
}

/// Construct a new VMCallerCheckedAnyfunc for the given function
/// (imported or defined in this module). Used during lazy
/// initialization the first time the VMCallerCheckedAnyfunc in
/// the VMContext is referenced; written into place in a careful
/// atomic way by `get_caller_checked_anyfunc()` below.
fn construct_anyfunc(&self, index: FuncIndex) -> VMCallerCheckedAnyfunc {
let sig = self.module.functions[index];
let type_index = self.info.shared_signatures.lookup(sig);

let (func_ptr, vmctx) = if let Some(def_index) = self.module.defined_func_index(index) {
(
(self.info.image_base + self.info.functions[def_index].start as usize) as *mut _,
self.vmctx_ptr(),
)
} else {
let import = self.imported_function(index);
(import.body.as_ptr(), import.vmctx)
};

VMCallerCheckedAnyfunc {
func_ptr,
type_index,
vmctx,
}
}

/// Get a word of the bitmap of anyfunc-initialized bits, and the
/// bitmask for the particular bit.
pub(crate) fn get_anyfunc_bitmap_word(&self, index: FuncIndex) -> (&mut u64, u64) {
let word_index = index.as_u32() / 64;
let bit_index = index.as_u32() % 64;
let word = unsafe {
self.vmctx_plus_offset::<u64>(
self.offsets.vmctx_anyfuncs_init_begin() + (word_index * 8),
)
.as_mut()
.unwrap()
};
let mask = 1u64 << (bit_index as u64);
(word, mask)
}

/// Get a `&VMCallerCheckedAnyfunc` for the given `FuncIndex`.
///
/// Returns `None` if the index is the reserved index value.
Expand All @@ -447,7 +492,61 @@ impl Instance {
return None;
}

unsafe { Some(&*self.vmctx_plus_offset(self.offsets.vmctx_anyfunc(index))) }
unsafe {
let anyfunc =
self.vmctx_plus_offset::<VMCallerCheckedAnyfunc>(self.offsets.vmctx_anyfunc(index));
let (bitmap_word, bitmask) = self.get_anyfunc_bitmap_word(index);
if *bitmap_word & bitmask == 0 {
// N.B.: both the anyfunc write and the bitmap-word
// write are non-atomic, yet this is inside a `&self`
// method. How does that work? In fact it is a benign
// race. In the most common case, an anyfunc is
// queried either as part of table init or as part of
// executing a function, both of which have a `&mut
// Store` so are actually exclusive. But if
// e.g. export-lookups happen in parallel, we might
// actually race.
//
// The race is benign because (i) the write is
// idempotent -- the constructed anyfunc will always
// be exactly the same, once it is constructed, so a
// second write, even partially complete, is fine
// (even if fragmented/split; any fragment written
// will just overwrite the same bytes); and (ii)
// racing to set a bit in the bitmap might miss some
// updates, and falsely clear a bit, but that just
// means that we do another init later, which as per
// (i) is safe.
//
// The important safety property is that before we
// hand out a reference to an anyfunc, we have
// initialized it *at least once*, and this property
// is always true because (i) unrelated bits are never
// *set* in the bitmap spontaneously, even by a race;
// (ii) this function always verifies set bit before
// returning; and (iii) once initialized, an anyfunc
// stays initialized (idempotent writes discussed
// above).
//
// Note that we tried it the other way first, with
// atomics rather than benign races. Using an atomic
// in the anyfunc itself to tell whether it's
// initialized is expensive because we have to zero
// all anyfuncs at instantiation, and zeroing words,
// located sparsely in the vmcontext, is expensive
// compared to zeroing a dense bitmap. Using atomic
// updates to the bitmap is expensive because
// `fetch_or` is expensive; the `lock or` instruction
// on x86-64 was the majority of the profile in an
// instantiation microbenchmark.
*anyfunc = self.construct_anyfunc(index);
let (bitmap_word, bitmask) = self.get_anyfunc_bitmap_word(index);
*bitmap_word |= bitmask;
}

let anyfunc = anyfunc.as_ref().unwrap();
Some(anyfunc)
}
}

unsafe fn anyfunc_base(&self) -> *mut VMCallerCheckedAnyfunc {
Expand Down Expand Up @@ -513,6 +612,7 @@ impl Instance {
ptr::null_mut()
} else {
debug_assert!(idx.as_u32() < self.offsets.num_defined_functions);
self.get_caller_checked_anyfunc(*idx); // Force lazy init
base.add(usize::try_from(idx.as_u32()).unwrap())
}
}),
Expand Down
Loading

0 comments on commit f45f689

Please sign in to comment.