diff --git a/Cargo.lock b/Cargo.lock index 519d6bc0336..4543674013e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -767,6 +767,15 @@ version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "memfd" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2cffa4ad52c6f791f4f8b15f0c05f9824b2ced1160e88cc393d64fff9a8ac64" +dependencies = [ + "rustix 0.38.17", +] + [[package]] name = "micro_http" version = "0.1.0" @@ -1412,6 +1421,7 @@ dependencies = [ "libc", "linux-loader", "log", + "memfd", "micro_http", "proptest", "seccompiler", diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index ea9e0dc60aa..3050aa7dc97 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -18,6 +18,7 @@ kvm-ioctls = "0.15.0" lazy_static = "1.4.0" libc = "0.2.117" linux-loader = "0.9.0" +memfd = "0.6.3" serde = { version = "1.0.136", features = ["derive", "rc"] } semver = { version = "1.0.17", features = ["serde"] } serde_json = "1.0.78" diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 0c834366276..d26070a4442 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -265,9 +265,10 @@ pub fn build_microvm_for_boot( .ok_or(MissingKernelConfig)?; let track_dirty_pages = vm_resources.track_dirty_pages(); - let guest_memory = - GuestMemoryMmap::with_size(vm_resources.vm_config.mem_size_mib, track_dirty_pages) - .map_err(StartMicrovmError::GuestMemory)?; + let memfd = crate::vstate::memory::create_memfd(vm_resources.vm_config.mem_size_mib) + .map_err(StartMicrovmError::GuestMemory)?; + let guest_memory = GuestMemoryMmap::with_file(memfd.as_file(), track_dirty_pages) + .map_err(StartMicrovmError::GuestMemory)?; let entry_addr = load_kernel(boot_config, &guest_memory)?; let initrd = load_initrd_from_config(boot_config, &guest_memory)?; // Clone the command-line so that a failed boot doesn't pollute the original. diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 7567ca93353..e461d43e224 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -37,7 +37,7 @@ const GUARD_PAGE_COUNT: usize = 1; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MemoryError { /// Cannot access file: {0:?} - FileHandle(std::io::Error), + FileError(std::io::Error), /// Cannot create memory: {0:?} CreateMemory(VmMemoryError), /// Cannot create memory region: {0:?} @@ -50,6 +50,10 @@ pub enum MemoryError { MmapRegionError(MmapRegionError), /// Cannot create guest memory: {0} VmMemoryError(VmMemoryError), + /// Cannot create memfd: {0:?} + Memfd(memfd::Error), + /// Cannot resize memfd file: {0:?} + MemfdSetLen(std::io::Error), } /// Defines the interface for snapshotting memory. @@ -57,6 +61,9 @@ pub trait GuestMemoryExtension where Self: Sized, { + /// Creates a GuestMemoryMmap with `size` in MiB and guard pages backed by file. + fn with_file(file: &File, track_dirty_pages: bool) -> Result; + /// Creates a GuestMemoryMmap with `size` in MiB and guard pages. fn with_size(size: usize, track_dirty_pages: bool) -> Result; @@ -119,7 +126,37 @@ pub struct GuestMemoryState { } impl GuestMemoryExtension for GuestMemoryMmap { - /// Creates a GuestMemoryMmap with `size` in MiB and guard pages. + /// Creates a GuestMemoryMmap with `size` in MiB and guard pages backed by file. + fn with_file(file: &File, track_dirty_pages: bool) -> Result { + let metadata = file.metadata().map_err(MemoryError::FileError)?; + let mem_size = u64_to_usize(metadata.len()); + let regions = crate::arch::arch_memory_regions(mem_size); + + let prot = libc::PROT_READ | libc::PROT_WRITE; + let flags = libc::MAP_NORESERVE | libc::MAP_SHARED; + + let mut offset: u64 = 0; + let regions = regions + .iter() + .map(|(guest_address, region_size)| { + let file_clone = file.try_clone().map_err(MemoryError::FileError)?; + let file_offset = FileOffset::new(file_clone, offset); + offset += *region_size as u64; + let region = build_guarded_region( + Some(&file_offset), + *region_size, + prot, + flags, + track_dirty_pages, + )?; + GuestRegionMmap::new(region, *guest_address).map_err(MemoryError::VmMemoryError) + }) + .collect::, MemoryError>>()?; + + GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) + } + + /// Creates a GuestMemoryMmap with `size` in MiB and guard pages backed by anonymous memory. fn with_size(size: usize, track_dirty_pages: bool) -> Result { let mem_size = size << 20; let regions = crate::arch::arch_memory_regions(mem_size); @@ -127,7 +164,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { Self::from_raw_regions(®ions, track_dirty_pages) } - /// Creates a GuestMemoryMmap from raw regions with guard pages. + /// Creates a GuestMemoryMmap from raw regions with guard pages backed by anonymous memory. fn from_raw_regions( regions: &[(GuestAddress, usize)], track_dirty_pages: bool, @@ -147,7 +184,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) } - /// Creates a GuestMemoryMmap from raw regions with no guard pages. + /// Creates a GuestMemoryMmap from raw regions with no guard pages backed by anonymous memory. fn from_raw_regions_unguarded( regions: &[(GuestAddress, usize)], track_dirty_pages: bool, @@ -195,7 +232,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { }) }) .collect::, std::io::Error>>() - .map_err(MemoryError::FileHandle)?; + .map_err(MemoryError::FileError)?; let prot = libc::PROT_READ | libc::PROT_WRITE; let flags = libc::MAP_NORESERVE | libc::MAP_PRIVATE; @@ -322,6 +359,33 @@ impl GuestMemoryExtension for GuestMemoryMmap { } } +/// Creates a memfd file with the `size` in MiB. +pub fn create_memfd(size: usize) -> Result { + let mem_size = size << 20; + // Create a memfd. + let opts = memfd::MemfdOptions::default().allow_sealing(true); + let mem_file = opts.create("guest_mem").map_err(MemoryError::Memfd)?; + + // Resize to guest mem size. + mem_file + .as_file() + .set_len(mem_size as u64) + .map_err(MemoryError::MemfdSetLen)?; + + // Add seals to prevent further resizing. + let mut seals = memfd::SealsHashSet::new(); + seals.insert(memfd::FileSeal::SealShrink); + seals.insert(memfd::FileSeal::SealGrow); + mem_file.add_seals(&seals).map_err(MemoryError::Memfd)?; + + // Prevent further sealing changes. + mem_file + .add_seal(memfd::FileSeal::SealSeal) + .map_err(MemoryError::Memfd)?; + + Ok(mem_file) +} + /// Build a `MmapRegion` surrounded by guard pages. /// /// Initially, we map a `PROT_NONE` guard region of size: @@ -844,4 +908,19 @@ mod tests { assert_eq!(expected_first_region, diff_file_content); } } + + #[test] + fn test_create_memfd() { + let size = 1; + let size_mb = 1 << 20; + + let memfd = create_memfd(size).unwrap(); + + assert_eq!(memfd.as_file().metadata().unwrap().len(), size_mb); + assert!(memfd.as_file().set_len(0x69).is_err()); + + let mut seals = memfd::SealsHashSet::new(); + seals.insert(memfd::FileSeal::SealGrow); + assert!(memfd.add_seals(&seals).is_err()); + } } diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 9dae036db3e..4632076316c 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -389,8 +389,7 @@ def test_api_machine_config(test_microvm_with_api): test_microvm.api.machine_config.patch(mem_size_mib=bad_size) fail_msg = re.escape( - "Invalid Memory Configuration: MmapRegion(Mmap(Os { code: " - "12, kind: OutOfMemory, message: Out of memory }))" + "Invalid Memory Configuration: MemfdSetLen(Custom { kind: InvalidInput, error: TryFromIntError(()) })" ) with pytest.raises(RuntimeError, match=fail_msg): test_microvm.start()