Skip to content

Commit

Permalink
Move function names out of Module (#3789)
Browse files Browse the repository at this point in the history
* Move function names out of `Module`

This commit moves function names in a module out of the
`wasmtime_environ::Module` type and into separate sections stored in the
final compiled artifact. Spurred on by #3787 to look at module load
times I noticed that a huge amount of time was spent in deserializing
this map. The `spidermonkey.wasm` file, for example, has a 3MB name
section which is a lot of unnecessary data to deserialize at module load
time.

The names of functions are now split out into their own dedicated
section of the compiled artifact and metadata about them is stored in a
more compact format at runtime by avoiding a `BTreeMap` and instead
using a sorted array. Overall this improves deserialize times by up to
80% for modules with large name sections since the name section is no
longer deserialized at load time and it's lazily paged in as names are
actually referenced.

* Fix a typo

* Fix compiled module determinism

Need to not only sort afterwards but also first to ensure the data of
the name section is consistent.
  • Loading branch information
alexcrichton authored Feb 10, 2022
1 parent 41eb225 commit 520a7f2
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 33 deletions.
6 changes: 3 additions & 3 deletions crates/cranelift/src/debug/transform/simulate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
use wasmparser::Type as WasmType;
use wasmtime_environ::{
DebugInfoData, DefinedFuncIndex, EntityRef, FunctionMetadata, WasmFileInfo,
DebugInfoData, DefinedFuncIndex, EntityRef, FuncIndex, FunctionMetadata, WasmFileInfo,
};

const PRODUCER_NAME: &str = "wasmtime";
Expand Down Expand Up @@ -369,7 +369,7 @@ pub fn generate_simulated_dwarf(

let func_index = imported_func_count + (index as u32);
let id = match func_names
.get(&func_index)
.get(&FuncIndex::from_u32(func_index))
.and_then(|s| check_invalid_chars_in_name(s))
{
Some(n) => out_strings.add(assert_dwarf_str!(n)),
Expand Down Expand Up @@ -400,7 +400,7 @@ pub fn generate_simulated_dwarf(
&[(source_range.0, source_range.1)],
&wasm_types,
&di.wasm_file.funcs[index],
locals_names.get(&(index as u32)),
locals_names.get(&FuncIndex::from_u32(index as u32)),
out_strings,
isa,
)?;
Expand Down
3 changes: 0 additions & 3 deletions crates/environ/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -664,9 +664,6 @@ pub struct Module {
/// The map from passive data index (data segment index space) to index in `passive_data`.
pub passive_data_map: BTreeMap<DataIndex, Range<u32>>,

/// WebAssembly function names.
pub func_names: BTreeMap<FuncIndex, String>,

/// Types declared in the wasm module.
pub types: PrimaryMap<TypeIndex, ModuleType>,

Expand Down
23 changes: 11 additions & 12 deletions crates/environ/src/module_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
#[allow(missing_docs)]
pub struct NameSection<'a> {
pub module_name: Option<&'a str>,
pub func_names: HashMap<u32, &'a str>,
pub locals_names: HashMap<u32, HashMap<u32, &'a str>>,
pub func_names: HashMap<FuncIndex, &'a str>,
pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
}

#[derive(Debug, Default)]
Expand Down Expand Up @@ -1291,18 +1291,17 @@ and for re-adding support for interface types you can see this issue:
if (index as usize) >= self.result.module.functions.len() {
continue;
}

// Store the name unconditionally, regardless of
// whether we're parsing debuginfo, since function
// names are almost always present in the
// final compilation artifact.
let index = FuncIndex::from_u32(index);
self.result
.module
.debuginfo
.name_section
.func_names
.insert(index, name.to_string());
if self.tunables.generate_native_debuginfo {
self.result
.debuginfo
.name_section
.func_names
.insert(index.as_u32(), name);
}
.insert(index, name);
}
}
wasmparser::Name::Module(module) => {
Expand Down Expand Up @@ -1332,7 +1331,7 @@ and for re-adding support for interface types you can see this issue:
.debuginfo
.name_section
.locals_names
.entry(f.indirect_index)
.entry(FuncIndex::from_u32(f.indirect_index))
.or_insert(HashMap::new())
.insert(index, name);
}
Expand Down
90 changes: 87 additions & 3 deletions crates/jit/src/instantiate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
use crate::code_memory::CodeMemory;
use crate::debug::create_gdbjit_image;
use crate::{MmapVec, ProfilingAgent};
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, bail, Context, Result};
use object::write::{Object, StandardSegment};
use object::{File, Object as _, ObjectSection, SectionKind};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
use std::ops::Range;
use std::str;
use std::sync::Arc;
use thiserror::Error;
use wasmtime_environ::{
CompileError, DefinedFuncIndex, FunctionInfo, InstanceSignature, InstanceTypeIndex, Module,
ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, SignatureIndex,
CompileError, DefinedFuncIndex, FuncIndex, FunctionInfo, InstanceSignature, InstanceTypeIndex,
Module, ModuleSignature, ModuleTranslation, ModuleTypeIndex, PrimaryMap, SignatureIndex,
StackMapInformation, Trampoline, Tunables, WasmFuncType, ELF_WASMTIME_ADDRMAP,
ELF_WASMTIME_TRAPS,
};
Expand Down Expand Up @@ -48,6 +50,23 @@ const ELF_WASM_DATA: &'static str = ".rodata.wasm";
/// decoded to get all the relevant information.
const ELF_WASMTIME_INFO: &'static str = ".wasmtime.info";

/// This is the name of the section in the final ELF image which contains a
/// concatenated list of all function names.
///
/// This section is optionally included in the final artifact depending on
/// whether the wasm module has any name data at all (or in the future if we add
/// an option to not preserve name data). This section is a concatenated list of
/// strings where `CompiledModuleInfo::func_names` stores offsets/lengths into
/// this section.
///
/// Note that the goal of this section is to avoid having to decode names at
/// module-load time if we can. Names are typically only used for debugging or
/// things like backtraces so there's no need to eagerly load all of them. By
/// storing the data in a separate section the hope is that the data, which is
/// sometimes quite large (3MB seen for spidermonkey-compiled-to-wasm), can be
/// paged in lazily from an mmap and is never paged in if we never reference it.
const ELF_NAME_DATA: &'static str = ".name.wasm";

/// An error condition while setting up a wasm instance, be it validation,
/// compilation, or instantiation.
#[derive(Error, Debug)]
Expand Down Expand Up @@ -82,6 +101,9 @@ pub struct CompiledModuleInfo {
/// Metadata about each compiled function.
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,

/// Sorted list, by function index, of names we have for this module.
func_names: Vec<FunctionName>,

/// The trampolines compiled into the text section and their start/length
/// relative to the start of the text section.
trampolines: Vec<Trampoline>,
Expand All @@ -90,6 +112,13 @@ pub struct CompiledModuleInfo {
meta: Metadata,
}

#[derive(Serialize, Deserialize)]
struct FunctionName {
idx: FuncIndex,
offset: u32,
len: u32,
}

#[derive(Serialize, Deserialize)]
struct Metadata {
/// Whether or not native debug information is available in `obj`
Expand Down Expand Up @@ -158,6 +187,32 @@ pub fn finish_compile(
obj.append_section_data(data_id, data, 1);
}

// If any names are present in the module then the `ELF_NAME_DATA` section
// is create and appended.
let mut func_names = Vec::new();
if debuginfo.name_section.func_names.len() > 0 {
let name_id = obj.add_section(
obj.segment_name(StandardSegment::Data).to_vec(),
ELF_NAME_DATA.as_bytes().to_vec(),
SectionKind::ReadOnlyData,
);
let mut sorted_names = debuginfo.name_section.func_names.iter().collect::<Vec<_>>();
sorted_names.sort_by_key(|(idx, _name)| *idx);
for (idx, name) in sorted_names {
let offset = obj.append_section_data(name_id, name.as_bytes(), 1);
let offset = match u32::try_from(offset) {
Ok(offset) => offset,
Err(_) => bail!("name section too large (> 4gb)"),
};
let len = u32::try_from(name.len()).unwrap();
func_names.push(FunctionName {
idx: *idx,
offset,
len,
});
}
}

// Update passive data offsets since they're all located after the other
// data in the module.
for (_, range) in module.passive_data_map.iter_mut() {
Expand Down Expand Up @@ -200,6 +255,7 @@ pub fn finish_compile(
module,
funcs,
trampolines,
func_names,
meta: Metadata {
native_debug_info_present: tunables.generate_native_debuginfo,
has_unparsed_debuginfo,
Expand Down Expand Up @@ -253,6 +309,8 @@ pub struct CompiledModule {
dbg_jit_registration: Option<GdbJitImageRegistration>,
/// A unique ID used to register this module with the engine.
unique_id: CompiledModuleId,
func_names: Vec<FunctionName>,
func_name_data: Range<usize>,
}

impl CompiledModule {
Expand Down Expand Up @@ -302,6 +360,15 @@ impl CompiledModule {
.context("failed to deserialize wasmtime module info")?,
};

let func_name_data = match code
.obj
.section_by_name(ELF_NAME_DATA)
.and_then(|s| s.data().ok())
{
Some(data) => subslice_range(data, code.mmap),
None => 0..0,
};

let mut ret = Self {
module: Arc::new(info.module),
funcs: info.funcs,
Expand All @@ -319,6 +386,8 @@ impl CompiledModule {
code_memory,
meta: info.meta,
unique_id: id_allocator.alloc(),
func_names: info.func_names,
func_name_data,
};
ret.register_debug_and_profiling(profiler)?;

Expand Down Expand Up @@ -391,6 +460,21 @@ impl CompiledModule {
&self.funcs
}

/// Looks up the `name` section name for the function index `idx`, if one
/// was specified in the original wasm module.
pub fn func_name(&self, idx: FuncIndex) -> Option<&str> {
// Find entry for `idx`, if present.
let i = self.func_names.binary_search_by_key(&idx, |n| n.idx).ok()?;
let name = &self.func_names[i];

// Here we `unwrap` the `from_utf8` but this can theoretically be a
// `from_utf8_unchecked` if we really wanted since this section is
// guaranteed to only have valid utf-8 data. Until it's a problem it's
// probably best to double-check this though.
let data = &self.mmap()[self.func_name_data.clone()];
Some(str::from_utf8(&data[name.offset as usize..][..name.len as usize]).unwrap())
}

/// Return a reference to a mutable module (if possible).
pub fn module_mut(&mut self) -> Option<&mut Module> {
Arc::get_mut(&mut self.module)
Expand Down
14 changes: 5 additions & 9 deletions crates/jit/src/profiling.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{demangling::demangle_function_name_or_index, CompiledModule};
use wasmtime_environ::{DefinedFuncIndex, EntityRef, Module};
use wasmtime_environ::{DefinedFuncIndex, EntityRef};

cfg_if::cfg_if! {
if #[cfg(all(feature = "jitdump", target_os = "linux"))] {
Expand Down Expand Up @@ -52,14 +52,10 @@ impl ProfilingAgent for NullProfilerAgent {
}

#[allow(dead_code)]
fn debug_name(module: &Module, index: DefinedFuncIndex) -> String {
let index = module.func_index(index);
fn debug_name(module: &CompiledModule, index: DefinedFuncIndex) -> String {
let index = module.module().func_index(index);
let mut debug_name = String::new();
demangle_function_name_or_index(
&mut debug_name,
module.func_names.get(&index).map(|s| s.as_str()),
index.index(),
)
.unwrap();
demangle_function_name_or_index(&mut debug_name, module.func_name(index), index.index())
.unwrap();
debug_name
}
2 changes: 1 addition & 1 deletion crates/jit/src/profiling/jitdump_linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ impl State {
}
} else {
let timestamp = self.get_time_stamp();
let name = super::debug_name(module.module(), idx);
let name = super::debug_name(module, idx);
self.dump_code_load_record(&name, addr, len, timestamp, pid, tid);
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/jit/src/profiling/vtune_linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ impl State {

for (idx, func) in module.finished_functions() {
let (addr, len) = unsafe { ((*func).as_ptr().cast::<u8>(), (*func).len()) };
let method_name = super::debug_name(module.module(), idx);
let method_name = super::debug_name(module, idx);
let method_id = self.get_method_id();
log::trace!(
"new function ({}) {:?}::{:?} @ {:?}\n",
Expand Down
2 changes: 1 addition & 1 deletion crates/wasmtime/src/module/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ impl GlobalRegisteredModule {
Some(FrameInfo {
module_name: module.name.clone(),
func_index: index.index() as u32,
func_name: module.func_names.get(&index).cloned(),
func_name: self.module.func_name(index).map(|s| s.to_string()),
instr,
func_start: info.start_srcloc,
symbols,
Expand Down

0 comments on commit 520a7f2

Please sign in to comment.