Skip to content

Commit

Permalink
Improve MallocMemory implementation (bytecodealliance#9634)
Browse files Browse the repository at this point in the history
* Optimize memory growth in debug mode which was showing up locally in
  profiles as being particularly slow.
* Fix a bug where the `memory_reservation_for_growth` was accidentally
  initialized instead of leaving it uninitialized as intended.
  • Loading branch information
alexcrichton authored Nov 20, 2024
1 parent 5f91722 commit 27ce0ba
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 8 deletions.
6 changes: 3 additions & 3 deletions crates/environ/src/tunables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ impl Tunables {
}

/// Returns the default set of tunables for running under MIRI.
pub fn default_miri() -> Tunables {
pub const fn default_miri() -> Tunables {
Tunables {
collector: None,

Expand Down Expand Up @@ -183,7 +183,7 @@ impl Tunables {
}

/// Returns the default set of tunables for running under a 32-bit host.
pub fn default_u32() -> Tunables {
pub const fn default_u32() -> Tunables {
Tunables {
// For 32-bit we scale way down to 10MB of reserved memory. This
// impacts performance severely but allows us to have more than a
Expand All @@ -197,7 +197,7 @@ impl Tunables {
}

/// Returns the default set of tunables for running under a 64-bit host.
pub fn default_u64() -> Tunables {
pub const fn default_u64() -> Tunables {
Tunables {
// 64-bit has tons of address space to static memories can have 4gb
// address space reservations liberally by default, allowing us to
Expand Down
119 changes: 114 additions & 5 deletions crates/wasmtime/src/runtime/vm/memory/malloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ impl MallocMemory {
bail!("malloc memory cannot be used with CoW images");
}

let byte_size = minimum
let initial_allocation_byte_size = minimum
.checked_add(
tunables
.memory_reservation_for_growth
Expand All @@ -47,10 +47,14 @@ impl MallocMemory {
)
.context("memory allocation size too large")?;

let element_len = byte_size_to_element_len(byte_size);
let initial_allocation_len = byte_size_to_element_len(initial_allocation_byte_size);
let mut storage = Vec::new();
storage.try_reserve(element_len).err2anyhow()?;
storage.resize(element_len, Align16(0));
storage.try_reserve(initial_allocation_len).err2anyhow()?;

let initial_len = byte_size_to_element_len(minimum);
if initial_len > 0 {
grow_storage_to(&mut storage, initial_len);
}
Ok(MallocMemory {
base_ptr: SendSyncPtr::new(NonNull::new(storage.as_mut_ptr()).unwrap()).cast(),
storage,
Expand All @@ -74,7 +78,7 @@ impl RuntimeLinearMemory for MallocMemory {
self.storage
.try_reserve(new_element_len - self.storage.len())
.err2anyhow()?;
self.storage.resize(new_element_len, Align16(0));
grow_storage_to(&mut self.storage, new_element_len);
self.base_ptr =
SendSyncPtr::new(NonNull::new(self.storage.as_mut_ptr()).unwrap()).cast();
}
Expand All @@ -98,3 +102,108 @@ fn byte_size_to_element_len(byte_size: usize) -> usize {
// element length of our vector.
byte_size_rounded_up / align
}

/// Helper that is the equivalent of `storage.resize(new_len, Align16(0))`
/// except it's also optimized to perform well in debug mode. Just using
/// `resize` leads to a per-element iteration which can be quite slow in debug
/// mode as it's not optimized to a memcpy, so it's manually optimized here
/// instead.
fn grow_storage_to(storage: &mut Vec<Align16>, new_len: usize) {
debug_assert!(new_len > storage.len());
assert!(new_len <= storage.capacity());
let capacity_to_set = new_len - storage.len();
let slice_to_initialize = &mut storage.spare_capacity_mut()[..capacity_to_set];
let byte_size = mem::size_of_val(slice_to_initialize);

// SAFETY: The `slice_to_initialize` is guaranteed to be in the capacity of
// the vector via the slicing above, so it's all owned memory by the
// vector. Additionally the `byte_size` is the exact size of the
// `slice_to_initialize` itself, so this `memset` should be in-bounds.
// Finally the `Align16` is a simple wrapper around `u128` for which 0
// is a valid byte pattern. This should make the initial `write_bytes` safe.
//
// Afterwards the `set_len` call should also be safe because we've
// initialized the tail end of the vector with zeros so it's safe to
// consider it having a new length now.
unsafe {
core::ptr::write_bytes(slice_to_initialize.as_mut_ptr().cast::<u8>(), 0, byte_size);
storage.set_len(new_len);
}
}

#[cfg(test)]
mod tests {
use super::*;

// This is currently required by the constructor but otherwise ignored in
// the creation of a `MallocMemory`, so just have a single one used in
// tests below.
const TY: wasmtime_environ::Memory = wasmtime_environ::Memory {
idx_type: wasmtime_environ::IndexType::I32,
limits: wasmtime_environ::Limits { min: 0, max: None },
shared: false,
page_size_log2: 16,
};

// Valid tunables that can be used to create a `MallocMemory`.
const TUNABLES: Tunables = Tunables {
memory_reservation: 0,
memory_guard_size: 0,
memory_init_cow: false,
..Tunables::default_miri()
};

#[test]
fn simple() {
let mut memory = MallocMemory::new(&TY, &TUNABLES, 10).unwrap();
assert_eq!(memory.storage.len(), 1);
assert_valid(&memory);

memory.grow_to(11).unwrap();
assert_eq!(memory.storage.len(), 1);
assert_valid(&memory);

memory.grow_to(16).unwrap();
assert_eq!(memory.storage.len(), 1);
assert_valid(&memory);

memory.grow_to(17).unwrap();
assert_eq!(memory.storage.len(), 2);
assert_valid(&memory);

memory.grow_to(65).unwrap();
assert_eq!(memory.storage.len(), 5);
assert_valid(&memory);
}

#[test]
fn reservation_not_initialized() {
let tunables = Tunables {
memory_reservation_for_growth: 1 << 20,
..TUNABLES
};
let mut memory = MallocMemory::new(&TY, &tunables, 10).unwrap();
assert_eq!(memory.storage.len(), 1);
assert_eq!(
memory.storage.capacity(),
(tunables.memory_reservation_for_growth / 16) as usize + 1,
);
assert_valid(&memory);

memory.grow_to(100).unwrap();
assert_eq!(memory.storage.len(), 7);
assert_eq!(
memory.storage.capacity(),
(tunables.memory_reservation_for_growth / 16) as usize + 1,
);
assert_valid(&memory);
}

fn assert_valid(mem: &MallocMemory) {
assert_eq!(mem.storage.as_ptr().cast::<u8>(), mem.base_ptr.as_ptr());
assert!(mem.byte_len <= mem.storage.len() * 16);
for slot in mem.storage.iter() {
assert_eq!(slot.0, 0);
}
}
}

0 comments on commit 27ce0ba

Please sign in to comment.