From 03118ad723a3b02a408445db21bb58f014b93c8f Mon Sep 17 00:00:00 2001 From: Peter Huene Date: Wed, 10 Feb 2021 20:29:20 -0800 Subject: [PATCH] Implement user fault handling with `userfaultfd` on Linux. This commit implements the `uffd` feature which turns on support for utilizing the `userfaultfd` system call on Linux for the pooling instance allocator. By handling page faults in userland, we are able to detect guard page accesses without having to constantly change memory page protections. This should help reduce the number of syscalls as well as kernel lock contentions when many threads are allocating and deallocating instances. Additionally, the user fault handler can lazy initialize table and linear memories of an instance (implementation to come). --- .github/workflows/main.yml | 1 + Cargo.toml | 1 + crates/runtime/Cargo.toml | 6 + .../runtime/src/instance/allocator/pooling.rs | 78 ++- .../src/instance/allocator/pooling/uffd.rs | 570 ++++++++++++++++++ crates/runtime/src/memory.rs | 57 ++ crates/wasmtime/Cargo.toml | 3 + 7 files changed, 707 insertions(+), 9 deletions(-) create mode 100644 crates/runtime/src/instance/allocator/pooling/uffd.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 02185746bcd6..38cb9584f061 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -122,6 +122,7 @@ jobs: - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features jitdump - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features cache - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features async + - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features uffd # Check some feature combinations of the `wasmtime-c-api` crate - run: cargo check --manifest-path crates/c-api/Cargo.toml --no-default-features diff --git a/Cargo.toml b/Cargo.toml index c4e01d0b1b5e..1044fb0825b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,6 +89,7 @@ jitdump = ["wasmtime/jitdump"] vtune = ["wasmtime/vtune"] wasi-crypto = ["wasmtime-wasi-crypto"] wasi-nn = ["wasmtime-wasi-nn"] +uffd = ["wasmtime/uffd"] # Try the experimental, work-in-progress new x86_64 backend. This is not stable # as of June 2020. diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index e4c282fef122..a074bbca8061 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -37,3 +37,9 @@ cc = "1.0" [badges] maintenance = { status = "actively-developed" } + +[features] +default = [] + +# Enables support for userfaultfd in the pooling allocator when building on Linux +uffd = ["userfaultfd"] diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index 349fb3681dbe..154bf9cb218a 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -31,6 +31,11 @@ cfg_if::cfg_if! { if #[cfg(windows)] { mod windows; use windows as imp; + } else if #[cfg(all(feature = "uffd", target_os = "linux"))] { + mod uffd; + use uffd as imp; + use imp::{PageFaultHandler, reset_guard_page}; + use std::sync::atomic::{AtomicBool, Ordering}; } else if #[cfg(target_os = "linux")] { mod linux; use linux as imp; @@ -335,6 +340,9 @@ impl Iterator for BasePointerIterator { /// structure depending on the limits used to create the pool. /// /// The pool maintains a free list for fast instance allocation. +/// +/// The userfault handler relies on how instances are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct InstancePool { mapping: Mmap, @@ -472,6 +480,10 @@ impl Drop for InstancePool { /// /// Each index into the pool returns an iterator over the base addresses /// of the instance's linear memories. +/// +/// +/// The userfault handler relies on how memories are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct MemoryPool { mapping: Mmap, @@ -524,6 +536,9 @@ impl MemoryPool { /// /// Each index into the pool returns an iterator over the base addresses /// of the instance's tables. +/// +/// The userfault handler relies on how tables are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct TablePool { mapping: Mmap, @@ -588,6 +603,9 @@ impl TablePool { /// /// The top of the stack (starting stack pointer) is returned when a stack is allocated /// from the pool. +/// +/// The userfault handler relies on how stacks are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct StackPool { mapping: Mmap, @@ -595,6 +613,8 @@ struct StackPool { max_instances: usize, page_size: usize, free_list: Mutex>, + #[cfg(all(feature = "uffd", target_os = "linux"))] + faulted_guard_pages: Arc<[AtomicBool]>, } impl StackPool { @@ -623,6 +643,11 @@ impl StackPool { max_instances, page_size, free_list: Mutex::new((0..max_instances).collect()), + #[cfg(all(feature = "uffd", target_os = "linux"))] + faulted_guard_pages: std::iter::repeat_with(|| false.into()) + .take(max_instances) + .collect::>() + .into(), }) } @@ -647,11 +672,25 @@ impl StackPool { .as_mut_ptr() .add((index * self.stack_size) + self.page_size); - // Make the stack accessible (excluding the guard page) - if !make_accessible(bottom_of_stack, size_without_guard) { - return Err(FiberStackError::Resource( - "failed to make instance memory accessible".into(), - )); + cfg_if::cfg_if! { + if #[cfg(all(feature = "uffd", target_os = "linux"))] { + // Check to see if a guard page needs to be reset + if self.faulted_guard_pages[index].swap(false, Ordering::SeqCst) { + if !reset_guard_page(bottom_of_stack.sub(self.page_size), self.page_size) { + return Err(FiberStackError::Resource( + "failed to reset stack guard page".into(), + )); + } + } + + } else { + // Make the stack accessible (excluding the guard page) + if !make_accessible(bottom_of_stack, size_without_guard) { + return Err(FiberStackError::Resource( + "failed to make instance memory accessible".into(), + )); + } + } } // The top of the stack should be returned @@ -697,6 +736,8 @@ pub struct PoolingInstanceAllocator { memories: mem::ManuallyDrop, tables: mem::ManuallyDrop, stacks: mem::ManuallyDrop, + #[cfg(all(feature = "uffd", target_os = "linux"))] + _fault_handler: PageFaultHandler, } impl PoolingInstanceAllocator { @@ -744,6 +785,9 @@ impl PoolingInstanceAllocator { let tables = TablePool::new(&module_limits, &instance_limits)?; let stacks = StackPool::new(&instance_limits, stack_size)?; + #[cfg(all(feature = "uffd", target_os = "linux"))] + let _fault_handler = PageFaultHandler::new(&instances, &memories, &tables, &stacks)?; + Ok(Self { strategy, module_limits, @@ -752,6 +796,8 @@ impl PoolingInstanceAllocator { memories: mem::ManuallyDrop::new(memories), tables: mem::ManuallyDrop::new(tables), stacks: mem::ManuallyDrop::new(stacks), + #[cfg(all(feature = "uffd", target_os = "linux"))] + _fault_handler, }) } @@ -800,14 +846,28 @@ impl PoolingInstanceAllocator { ) -> Result<(), InstantiationError> { let module = instance.module.as_ref(); + // Reset all guard pages before clearing the previous memories + #[cfg(all(feature = "uffd", target_os = "linux"))] + for (_, m) in instance.memories.iter() { + m.reset_guard_pages() + .map_err(InstantiationError::Resource)?; + } + instance.memories.clear(); for plan in (&module.memory_plans.values().as_slice()[module.num_imported_memories..]).iter() { instance.memories.push( - Memory::new_static(plan, memories.next().unwrap(), max_pages, make_accessible) - .map_err(InstantiationError::Resource)?, + Memory::new_static( + plan, + memories.next().unwrap(), + max_pages, + make_accessible, + #[cfg(all(feature = "uffd", target_os = "linux"))] + reset_guard_page, + ) + .map_err(InstantiationError::Resource)?, ); } @@ -826,7 +886,6 @@ impl PoolingInstanceAllocator { let module = instance.module.as_ref(); instance.tables.clear(); - for plan in (&module.table_plans.values().as_slice()[module.num_imported_tables..]).iter() { let base = tables.next().unwrap(); @@ -852,7 +911,8 @@ impl PoolingInstanceAllocator { impl Drop for PoolingInstanceAllocator { fn drop(&mut self) { - // There are manually dropped for the future uffd implementation + // Manually drop the pools before the fault handler (if uffd is enabled) + // This ensures that any fault handler thread monitoring the pool memory terminates unsafe { mem::ManuallyDrop::drop(&mut self.instances); mem::ManuallyDrop::drop(&mut self.memories); diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs new file mode 100644 index 000000000000..dda577b5c14b --- /dev/null +++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs @@ -0,0 +1,570 @@ +//! Implements user-mode page fault handling with the `userfaultfd` ("uffd") system call on Linux. +//! +//! Handling page faults for memory accesses in regions relating to WebAssembly instances +//! enables the implementation of guard pages in user space rather than kernel space. +//! +//! This reduces the number of system calls and kernel locks needed to provide correct +//! WebAssembly memory semantics. +//! +//! Additionally, linear memories and WebAssembly tables can be lazy-initialized upon access. +//! +//! This feature requires a Linux kernel 4.11 or newer to use. + +use super::{InstancePool, MemoryPool, StackPool, TablePool}; +use crate::{instance::Instance, table::max_table_element_size, Memory, Mmap}; +use std::convert::TryInto; +use std::ptr; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; +use std::thread; +use userfaultfd::{Event, FeatureFlags, IoctlFlags, Uffd, UffdBuilder}; +use wasmtime_environ::{ + wasm::{DefinedMemoryIndex, DefinedTableIndex}, + WASM_PAGE_SIZE, +}; + +pub unsafe fn make_accessible(_addr: *mut u8, _len: usize) -> bool { + // A no-op when userfaultfd is used + true +} + +pub unsafe fn reset_guard_page(addr: *mut u8, len: usize) -> bool { + // Guard pages are READ_WRITE with uffd until faulted + region::protect(addr, len, region::Protection::READ_WRITE).is_ok() +} + +pub unsafe fn decommit(addr: *mut u8, len: usize) { + // Use MADV_DONTNEED to mark the pages as missing + // This will cause a missing page fault for next access on any page in the given range + assert_eq!( + libc::madvise(addr as _, len, libc::MADV_DONTNEED), + 0, + "madvise failed to mark pages as missing: {}", + std::io::Error::last_os_error() + ); +} + +pub fn create_memory_map(_accessible_size: usize, mapping_size: usize) -> Result { + // Allocate a single read-write region at once + // As writable pages need to count towards commit charge, use MAP_NORESERVE to override. + // This implies that the kernel is configured to allow overcommit or else + // this allocation will almost certainly fail without a plethora of physical memory to back the alloction. + // The consequence of not reserving is that our process may segfault on any write to a memory + // page that cannot be backed (i.e. out of memory conditions). + + if mapping_size == 0 { + return Ok(Mmap::new()); + } + + unsafe { + let ptr = libc::mmap( + ptr::null_mut(), + mapping_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_NORESERVE, + -1, + 0, + ); + + if ptr as isize == -1_isize { + return Err(format!( + "failed to allocate pool memory: {}", + std::io::Error::last_os_error() + )); + } + + Ok(Mmap::from_raw(ptr as usize, mapping_size)) + } +} + +/// Represents a location of a page fault within monitored regions of memory. +enum AddressLocation<'a> { + /// The address location is in a WebAssembly table page. + TablePage { + /// The address of the page being accessed. + page_addr: *mut u8, + /// The length of the page being accessed. + len: usize, + /// The starting element initialization index. + init_start: usize, + /// The exclusive ending element initialization index. + init_end: usize, + }, + /// The address location is in a WebAssembly linear memory page. + MemoryPage { + mem: &'a Memory, + /// The address of the page being accessed. + page_addr: *mut u8, + /// The length of the page being accessed. + len: usize, + /// The starting data initialization offset. + init_start: usize, + /// The exclusive ending data initialization offset. + init_end: usize, + /// Whether or not the access was inbounds (i.e. not a guard page). + inbounds: bool, + }, + /// The address location is in an execution stack. + StackPage { + /// The address of the page being accessed. + page_addr: *mut u8, + /// The length of the page being accessed. + len: usize, + /// The index of the stack that was accessed. + index: usize, + /// Whether or not the access was inbounds (i.e. not a guard page). + inbounds: bool, + }, +} + +/// Used to resolve fault addresses to address locations. +/// +/// This implementation relies heavily on how the various resource pools utilize their memory. +/// +/// `usize` is used here instead of pointers to keep this `Send` as it gets sent to the handler thread. +struct AddressLocator { + instances_start: usize, + instances_end: usize, + instance_size: usize, + max_instances: usize, + memories_start: usize, + memories_end: usize, + memory_size: usize, + max_memories: usize, + tables_start: usize, + tables_end: usize, + table_size: usize, + max_tables: usize, + stacks_start: usize, + stacks_end: usize, + stack_size: usize, + page_size: usize, +} + +impl AddressLocator { + fn new( + instances: &InstancePool, + memories: &MemoryPool, + tables: &TablePool, + stacks: &StackPool, + ) -> Self { + let instances_start = instances.mapping.as_ptr() as usize; + let instances_end = instances_start + instances.mapping.len(); + let memories_start = memories.mapping.as_ptr() as usize; + let memories_end = memories_start + memories.mapping.len(); + let tables_start = tables.mapping.as_ptr() as usize; + let tables_end = tables_start + tables.mapping.len(); + let stacks_start = stacks.mapping.as_ptr() as usize; + let stacks_end = stacks_start + stacks.mapping.len(); + let stack_size = stacks.stack_size; + + // Should always have instances + debug_assert!(instances_start != 0); + + Self { + instances_start, + instances_end, + instance_size: instances.instance_size, + max_instances: instances.max_instances, + memories_start, + memories_end, + memory_size: memories.memory_size, + max_memories: memories.max_memories, + tables_start, + tables_end, + table_size: tables.table_size, + max_tables: tables.max_tables, + stacks_start, + stacks_end, + stack_size, + page_size: tables.page_size, + } + } + + // This is super-duper unsafe as it is used from the handler thread + // to access instance data without any locking primitives. + /// + /// It is assumed that the thread that owns the instance being accessed is + /// currently suspended waiting on a fault to be handled. + /// + /// Of course a stray faulting memory access from a thread that does not own + /// the instance might introduce a race, but this implementation considers + /// such to be a serious bug. + /// + /// If the assumption holds true, accessing the instance data from the handler thread + /// should, in theory, be safe. + unsafe fn get_instance(&self, index: usize) -> &mut Instance { + debug_assert!(index < self.max_instances); + &mut *((self.instances_start + (index * self.instance_size)) as *mut Instance) + } + + unsafe fn get_location(&self, addr: usize) -> Option { + // Check for a memory location + if addr >= self.memories_start && addr < self.memories_end { + let index = (addr - self.memories_start) / self.memory_size; + let wasm_mem_start = self.memories_start + (index * self.memory_size); + let wasm_mem_offset = addr - wasm_mem_start; + let wasm_page = wasm_mem_offset / (WASM_PAGE_SIZE as usize); + let init_start = wasm_page * (WASM_PAGE_SIZE as usize); + + let mem = &self.get_instance(index / self.max_memories).memories + [DefinedMemoryIndex::from_u32((index % self.max_memories).try_into().unwrap())]; + + return Some(AddressLocation::MemoryPage { + mem, + page_addr: (wasm_mem_start + init_start) as _, + len: WASM_PAGE_SIZE as usize, + init_start, + init_end: init_start + (WASM_PAGE_SIZE as usize), + inbounds: wasm_page < mem.size() as usize, + }); + } + + // Check for a table location + if addr >= self.tables_start && addr < self.tables_end { + let element_size = max_table_element_size(); + let elements_per_page = self.page_size / element_size; + + let index = (addr - self.tables_start) / self.table_size; + let table_start = self.tables_start + (index * self.table_size); + let table_offset = addr - table_start; + let page_offset = (table_offset / self.page_size) * self.page_size; + let init_start = page_offset * element_size; + + let instance = self.get_instance(index / self.max_tables); + let table = &instance.tables + [DefinedTableIndex::from_u32((index % self.max_tables).try_into().unwrap())]; + + return Some(AddressLocation::TablePage { + page_addr: (table_start + page_offset) as _, + len: self.page_size, + init_start, + init_end: std::cmp::min( + table.maximum().unwrap_or(u32::MAX) as usize, + init_start + elements_per_page, + ), + }); + } + + // Check for a stack location + if addr >= self.stacks_start && addr < self.stacks_end { + let index = (addr - self.stacks_start) / self.stack_size; + let stack_start = self.stacks_start + (index * self.stack_size); + let stack_offset = addr - stack_start; + let page_offset = (stack_offset / self.page_size) * self.page_size; + + return Some(AddressLocation::StackPage { + page_addr: (stack_start + page_offset) as _, + len: self.page_size, + index, + inbounds: stack_offset >= self.page_size, + }); + } + + None + } +} + +fn wake_guard_page_access(uffd: &Uffd, page_addr: *const u8, len: usize) -> Result<(), String> { + unsafe { + // Set the page to NONE to induce a SIGSEV for the access on the next retry + region::protect(page_addr, len, region::Protection::NONE) + .map_err(|e| format!("failed to change guard page protection: {}", e))?; + + uffd.wake(page_addr as _, len).map_err(|e| { + format!( + "failed to wake page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + + Ok(()) + } +} + +fn handler_thread( + uffd: Uffd, + locator: AddressLocator, + mut registrations: usize, + faulted_stack_guard_pages: Arc<[AtomicBool]>, +) -> Result<(), String> { + loop { + match uffd.read_event().expect("failed to read event") { + Some(Event::Unmap { start, end }) => { + log::trace!("memory region unmapped: {:p}-{:p}", start, end); + + let (start, end) = (start as usize, end as usize); + + if (start == locator.instances_start && end == locator.instances_end) + || (start == locator.memories_start && end == locator.memories_end) + || (start == locator.tables_start && end == locator.tables_end) + || (start == locator.stacks_start && end == locator.stacks_end) + { + registrations -= 1; + if registrations == 0 { + break; + } + } else { + panic!("unexpected memory region unmapped"); + } + } + Some(Event::Pagefault { + addr: access_addr, .. + }) => { + unsafe { + match locator.get_location(access_addr as usize) { + Some(AddressLocation::TablePage { + page_addr, + len, + init_start, + init_end, + }) => { + log::trace!( + "handling fault in table at address {:p} on page {:p}; initializing elements [{}, {})", + access_addr, + page_addr, + init_start, + init_end + ); + + // TODO: copy the table initialization elements rather than zero the page + // TODO: are retries necessary? + uffd.zeropage(page_addr as _, len, true).map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + } + Some(AddressLocation::MemoryPage { + mem, + page_addr, + len, + init_start, + init_end, + inbounds, + }) => { + log::trace!("handling fault in linear memory at address {:p} on page {:p}; initializing data [{:p}, {:p})", + access_addr, page_addr, init_start as *const (), init_end as *const ()); + + if !inbounds { + // Logging as trace as memory accesses are not bounds checked + log::trace!("out of bounds memory access at {:p}", access_addr); + + // Record the page fault with the linear memory + // The next time the memory is grown or reused, the guard page protection + // will be reset. + mem.record_guard_page_fault(page_addr, len); + wake_guard_page_access(&uffd, page_addr, len)?; + continue; + } + + // TODO: copy the memory initialization data rather than zero the page + // TODO: are retries necessary? + uffd.zeropage(page_addr as _, len, true).map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + } + Some(AddressLocation::StackPage { + page_addr, + len, + index, + inbounds, + }) => { + log::trace!( + "handling fault in stack {} at address {:p}", + index, + access_addr, + ); + + if !inbounds { + // Logging as trace as stack guard pages might be a trap condition in the future + log::trace!("stack overflow fault at {:p}", access_addr); + + // Mark the stack as having a faulted guard page + // The next time the stack is used the guard page will be reset + faulted_stack_guard_pages[index].store(true, Ordering::SeqCst); + wake_guard_page_access(&uffd, page_addr, len)?; + continue; + } + + // Always zero stack pages + // TODO: are retries necessary? + uffd.zeropage(page_addr as _, len, true).map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + } + None => { + return Err(format!( + "failed to locate fault address {:p} in registered memory regions", + access_addr + )); + } + } + } + } + Some(_) => continue, + None => break, + } + } + + Ok(()) +} + +#[derive(Debug)] +pub struct PageFaultHandler { + thread: Option>>, +} + +impl PageFaultHandler { + pub(super) fn new( + instances: &InstancePool, + memories: &MemoryPool, + tables: &TablePool, + stacks: &StackPool, + ) -> Result { + let uffd = UffdBuilder::new() + .close_on_exec(true) + .require_features(FeatureFlags::EVENT_UNMAP) + .create() + .map_err(|e| format!("failed to create user fault descriptor: {}", e))?; + + // Register the ranges with the userfault fd + let mut registrations = 0; + for (start, len) in &[ + (memories.mapping.as_ptr() as usize, memories.mapping.len()), + (tables.mapping.as_ptr() as usize, tables.mapping.len()), + (stacks.mapping.as_ptr() as usize, stacks.mapping.len()), + ] { + if *start == 0 || *len == 0 { + continue; + } + + let ioctls = uffd + .register(*start as _, *len) + .map_err(|e| format!("failed to register user fault range: {}", e))?; + + if !ioctls.contains(IoctlFlags::WAKE | IoctlFlags::COPY | IoctlFlags::ZEROPAGE) { + return Err(format!( + "required user fault ioctls not supported; found: {:?}", + ioctls, + )); + } + + registrations += 1; + } + + let thread = if registrations == 0 { + log::trace!("user fault handling disabled as there are no regions to monitor"); + None + } else { + log::trace!( + "user fault handling enabled on {} memory regions", + registrations + ); + + unsafe { + libc::nice(-3); + } + + let locator = AddressLocator::new(&instances, &memories, &tables, &stacks); + + let faulted_stack_guard_pages = stacks.faulted_guard_pages.clone(); + + Some( + thread::Builder::new() + .name("page fault handler".into()) + .spawn(move || { + handler_thread(uffd, locator, registrations, faulted_stack_guard_pages) + }) + .map_err(|e| format!("failed to spawn page fault handler thread: {}", e))?, + ) + }; + + Ok(Self { thread }) + } +} + +impl Drop for PageFaultHandler { + fn drop(&mut self) { + if let Some(thread) = self.thread.take() { + thread + .join() + .expect("failed to join page fault handler thread") + .expect("fault handler thread failed"); + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{InstanceLimits, ModuleLimits}; + + #[cfg(target_pointer_width = "64")] + #[test] + fn test_address_locator() { + let module_limits = ModuleLimits { + imported_functions: 0, + imported_tables: 0, + imported_memories: 0, + imported_globals: 0, + types: 0, + functions: 0, + tables: 3, + memories: 2, + globals: 0, + table_elements: 10, + memory_pages: 1, + }; + let instance_limits = InstanceLimits { + count: 3, + address_space_size: (WASM_PAGE_SIZE * 2) as u64, + }; + + let instances = + InstancePool::new(&module_limits, &instance_limits).expect("should allocate"); + let memories = MemoryPool::new(&module_limits, &instance_limits).expect("should allocate"); + let tables = TablePool::new(&module_limits, &instance_limits).expect("should allocate"); + let stacks = StackPool::new(&instance_limits, 8192).expect("should allocate"); + + let locator = AddressLocator::new(&instances, &memories, &tables, &stacks); + + assert_eq!(locator.instances_start, instances.mapping.as_ptr() as usize); + assert_eq!( + locator.instances_end, + locator.instances_start + instances.mapping.len() + ); + assert_eq!(locator.instance_size, 4096); + assert_eq!(locator.max_instances, 3); + assert_eq!(locator.memories_start, memories.mapping.as_ptr() as usize); + assert_eq!( + locator.memories_end, + locator.memories_start + memories.mapping.len() + ); + assert_eq!(locator.memory_size, (WASM_PAGE_SIZE * 2) as usize); + assert_eq!(locator.max_memories, 2); + assert_eq!(locator.tables_start, tables.mapping.as_ptr() as usize); + assert_eq!( + locator.tables_end, + locator.tables_start + tables.mapping.len() + ); + assert_eq!(locator.table_size, 4096); + assert_eq!(locator.max_tables, 3); + + assert_eq!(locator.stacks_start, stacks.mapping.as_ptr() as usize); + assert_eq!( + locator.stacks_end, + locator.stacks_start + stacks.mapping.len() + ); + assert_eq!(locator.stack_size, 12288); + } +} diff --git a/crates/runtime/src/memory.rs b/crates/runtime/src/memory.rs index 7d248b136189..baed7f775270 100644 --- a/crates/runtime/src/memory.rs +++ b/crates/runtime/src/memory.rs @@ -178,6 +178,10 @@ enum MemoryStorage { size: Cell, maximum: u32, make_accessible: unsafe fn(*mut u8, usize) -> bool, + #[cfg(all(feature = "uffd", target_os = "linux"))] + guard_page_faults: RefCell>, + #[cfg(all(feature = "uffd", target_os = "linux"))] + reset_guard_page: unsafe fn(*mut u8, usize) -> bool, }, Dynamic(Box), } @@ -204,6 +208,11 @@ impl Memory { base: *mut u8, maximum: u32, make_accessible: unsafe fn(*mut u8, usize) -> bool, + #[cfg(all(feature = "uffd", target_os = "linux"))] reset_guard_page: unsafe fn( + *mut u8, + usize, + ) + -> bool, ) -> Result { if plan.memory.minimum > 0 { if unsafe { @@ -219,6 +228,10 @@ impl Memory { size: Cell::new(plan.memory.minimum), maximum: min(plan.memory.maximum.unwrap_or(maximum), maximum), make_accessible, + #[cfg(all(feature = "uffd", target_os = "linux"))] + guard_page_faults: RefCell::new(Vec::new()), + #[cfg(all(feature = "uffd", target_os = "linux"))] + reset_guard_page, }, }) } @@ -262,6 +275,10 @@ impl Memory { return None; } + // With uffd enabled, faulted guard pages need to be reset prior to growing memory + #[cfg(all(feature = "uffd", target_os = "linux"))] + self.reset_guard_pages().ok()?; + size.set(new_size); Some(old_size) @@ -280,4 +297,44 @@ impl Memory { MemoryStorage::Dynamic(mem) => mem.vmmemory(), } } + + /// Records a faulted guard page. + /// + /// This is used to track faulted guard pages that need to be reset before growing memory. + #[cfg(all(feature = "uffd", target_os = "linux"))] + pub fn record_guard_page_fault(&self, page_addr: *mut u8, size: usize) { + if let MemoryStorage::Static { + guard_page_faults, .. + } = &self.storage + { + let mut faults = guard_page_faults.borrow_mut(); + faults.push((page_addr, size)); + } + } + + /// Resets previously faulted guard pages. + /// + /// This is used to reset the protection of any guard pages that were previously faulted. + /// + /// Resetting the guard pages is required before growing memory. + #[cfg(all(feature = "uffd", target_os = "linux"))] + pub fn reset_guard_pages(&self) -> Result<(), String> { + if let MemoryStorage::Static { + guard_page_faults, + reset_guard_page, + .. + } = &self.storage + { + let mut faults = guard_page_faults.borrow_mut(); + for (addr, len) in faults.drain(..) { + unsafe { + if !reset_guard_page(addr, len) { + return Err("failed to reset previously faulted memory guard page".into()); + } + } + } + } + + Ok(()) + } } diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 0d3c1a9f7ba8..f5f48a43a2ed 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -73,3 +73,6 @@ experimental_x64 = ["wasmtime-jit/experimental_x64"] # Enables support for "async stores" as well as defining host functions as # `async fn` and calling functions asynchronously. async = ["wasmtime-fiber"] + +# Enables userfaultfd support in the runtime's pooling allocator when building on Linux +uffd = ["wasmtime-runtime/uffd"]