diff --git a/CHANGELOG.md b/CHANGELOG.md index 974cda4d45f..8ae67f9c8f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,10 @@ ### Added - [#4045](https://github.com/firecracker-microvm/firecracker/pull/4045) + and [#4075](https://github.com/firecracker-microvm/firecracker/pull/4075): Added `snapshot-editor` tool for modifications of snapshot files. + It allows for rebasing of memory snapshot files, printing and + removing aarch64 registers from the vmstate and obtaining snapshot version. - [#3967](https://github.com/firecracker-microvm/firecracker/pull/3967/): Added new fields to the custom CPU templates. (aarch64 only) `vcpu_features` field allows modifications of vCPU features enabled during vCPU diff --git a/Cargo.lock b/Cargo.lock index 8fadcc697e5..7ae080f033a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1207,6 +1207,7 @@ dependencies = [ "libc", "snapshot", "thiserror", + "utils", "vmm", ] diff --git a/docs/snapshotting/snapshot-editor.md b/docs/snapshotting/snapshot-editor.md index 85028d3a5c8..89785c697c9 100644 --- a/docs/snapshotting/snapshot-editor.md +++ b/docs/snapshotting/snapshot-editor.md @@ -10,11 +10,39 @@ Firecracker snapshot consists of 2 files: devices states etc. - `memory` file: file with guest memory. -`snapshot-editor` (currently) allows to modify only `vmstate` files only -on aarch64 platform. - ## Usage +### `edit-memory` command + +#### `rebase` subcommand + +This command is used to merge a `diff` snapshot memory file on +top of a base memory file. + +**Note** +You can also use `rebase-snap` tool for this. + +Arguments: + +- `MEMORY_PATH` - path to the `memory` file +- `DIFF_PATH` - path to the `diff` file + +Usage: + +```bash +snapshot-editor edit-memory rebase \ + --memory-path \ + --diff-path +``` + +Example: + +```bash +snapshot-editor edit-memory rebase \ + --memory-path ./memory_file \ + --diff-path ./diff_file +``` + ### `edit-vmstate` command #### `remove-regs` subcommand (aarch64 only) diff --git a/docs/snapshotting/snapshot-support.md b/docs/snapshotting/snapshot-support.md index fd07bed014f..2d16bdd6461 100644 --- a/docs/snapshotting/snapshot-support.md +++ b/docs/snapshotting/snapshot-support.md @@ -244,12 +244,23 @@ created by a subsequent `/snapshot/create` API call. The order in which the snapshots were created matters and they should be merged in the same order in which they were created. To merge a `diff` snapshot memory file on top of a base, users should copy its content over the base. This can be done -using the `rebase-snap` tool provided with the firecracker release: +using the `rebase-snap` or `snapshot-editor` tools provided with the +firecracker release: + +`rebase-snap` example: ```bash rebase-snap --base-file path/to/base --diff-file path/to/layer ``` +`snapshot-editor` example: + +```bash +snapshot-editor edit-memory rebase \ + --memory-path path/to/base \ + --diff-path path/to/layer +``` + After executing the command above, the base would be a resumable snapshot memory file describing the state of the memory at the moment of creation of the layer. More layers which were created later can be merged on top of this base. diff --git a/src/snapshot-editor/Cargo.toml b/src/snapshot-editor/Cargo.toml index cb3e70a6919..9dfa9e2b5f1 100644 --- a/src/snapshot-editor/Cargo.toml +++ b/src/snapshot-editor/Cargo.toml @@ -17,3 +17,5 @@ libc = "0.2.147" snapshot = { path = "../snapshot" } thiserror = "1.0.44" vmm = { path = "../vmm" } + +fc_utils = { package = "utils", path = "../utils" } diff --git a/src/snapshot-editor/src/edit_memory.rs b/src/snapshot-editor/src/edit_memory.rs new file mode 100644 index 00000000000..c4675aa5fec --- /dev/null +++ b/src/snapshot-editor/src/edit_memory.rs @@ -0,0 +1,257 @@ +// Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::fs::OpenOptions; +use std::io::{Seek, SeekFrom}; +use std::os::fd::AsRawFd; +use std::path::PathBuf; + +use clap::Subcommand; +use fc_utils::seek_hole::SeekHole; + +#[derive(Debug, thiserror::Error)] +pub enum EditMemoryError { + #[error("Could not open memory file: {0}")] + OpenMemoryFile(std::io::Error), + #[error("Could not open diff file: {0}")] + OpenDiffFile(std::io::Error), + #[error("Failed to seek data in diff file: {0}")] + SeekDataDiff(std::io::Error), + #[error("Failed to seek hole in diff file: {0}")] + SeekHoleDiff(std::io::Error), + #[error("Failed to get metadata for diff file: {0}")] + MetadataDiff(std::io::Error), + #[error("Failed to seek in memory file: {0}")] + SeekMemory(std::io::Error), + #[error("Failed to send the file: {0:?}")] + SendFile(std::io::Error), +} + +#[derive(Debug, Subcommand)] +pub enum EditMemorySubCommand { + /// Remove registers from vcpu states. + Rebase { + /// Path to the memory file. + #[arg(short, long)] + memory_path: PathBuf, + /// Path to the diff file. + #[arg(short, long)] + diff_path: PathBuf, + }, +} + +pub fn edit_memory_command(command: EditMemorySubCommand) -> Result<(), EditMemoryError> { + match command { + EditMemorySubCommand::Rebase { + memory_path, + diff_path, + } => rebase(memory_path, diff_path)?, + } + Ok(()) +} + +fn rebase(memory_path: PathBuf, diff_path: PathBuf) -> Result<(), EditMemoryError> { + let mut base_file = OpenOptions::new() + .write(true) + .open(memory_path) + .map_err(EditMemoryError::OpenMemoryFile)?; + + let mut diff_file = OpenOptions::new() + .read(true) + .open(diff_path) + .map_err(EditMemoryError::OpenDiffFile)?; + + let mut cursor: u64 = 0; + while let Some(block_start) = diff_file + .seek_data(cursor) + .map_err(EditMemoryError::SeekDataDiff)? + { + cursor = block_start; + let block_end = match diff_file + .seek_hole(block_start) + .map_err(EditMemoryError::SeekHoleDiff)? + { + Some(hole_start) => hole_start, + None => diff_file + .metadata() + .map_err(EditMemoryError::MetadataDiff)? + .len(), + }; + + while cursor < block_end { + base_file + .seek(SeekFrom::Start(cursor)) + .map_err(EditMemoryError::SeekMemory)?; + + // SAFETY: Safe because the parameters are valid. + let num_transferred_bytes = unsafe { + libc::sendfile64( + base_file.as_raw_fd(), + diff_file.as_raw_fd(), + (&mut cursor as *mut u64).cast::(), + block_end.saturating_sub(cursor) as usize, + ) + }; + if num_transferred_bytes < 0 { + return Err(EditMemoryError::SendFile(std::io::Error::last_os_error())); + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::fs::File; + use std::io::{Seek, SeekFrom, Write}; + use std::os::unix::fs::FileExt; + + use fc_utils::{rand, tempfile}; + + use super::*; + + fn check_file_content(file: &File, expected_content: &[u8]) { + assert_eq!( + file.metadata().unwrap().len() as usize, + expected_content.len() + ); + let mut buf = vec![0u8; expected_content.len()]; + file.read_exact_at(buf.as_mut_slice(), 0).unwrap(); + assert_eq!(&buf, expected_content); + } + + #[test] + fn test_rebase_empty_files() { + let base = tempfile::TempFile::new().unwrap(); + let diff = tempfile::TempFile::new().unwrap(); + + let base_file = base.as_file(); + + let base_path = base.as_path().to_path_buf(); + let diff_path = diff.as_path().to_path_buf(); + + // Empty files + rebase(base_path, diff_path).unwrap(); + assert_eq!(base_file.metadata().unwrap().len(), 0); + } + + #[test] + fn test_rebase_empty_diff() { + let base = tempfile::TempFile::new().unwrap(); + let diff = tempfile::TempFile::new().unwrap(); + + let mut base_file = base.as_file(); + let diff_file = diff.as_file(); + + let base_path = base.as_path().to_path_buf(); + let diff_path = diff.as_path().to_path_buf(); + + let initial_base_file_content = rand::rand_bytes(50000); + base_file.write_all(&initial_base_file_content).unwrap(); + + // Diff file that has only holes + diff_file + .set_len(initial_base_file_content.len() as u64) + .unwrap(); + rebase(base_path, diff_path).unwrap(); + check_file_content(base_file, &initial_base_file_content); + } + + #[test] + fn test_rebase_full_diff() { + let base = tempfile::TempFile::new().unwrap(); + let diff = tempfile::TempFile::new().unwrap(); + + let base_file = base.as_file(); + let mut diff_file = diff.as_file(); + + let base_path = base.as_path().to_path_buf(); + let diff_path = diff.as_path().to_path_buf(); + + // Diff file that has only data + let diff_data = rand::rand_bytes(50000); + diff_file.write_all(&diff_data).unwrap(); + rebase(base_path, diff_path).unwrap(); + check_file_content(base_file, &diff_data); + } + + #[test] + fn test_rebase() { + // The filesystem punches holes only for blocks >= 4096. + // It doesn't make sense to test for smaller ones. + let block_sizes: &[usize] = &[4096, 8192]; + for &block_size in block_sizes { + let mut expected_result = vec![]; + + let base = tempfile::TempFile::new().unwrap(); + let diff = tempfile::TempFile::new().unwrap(); + + let mut base_file = base.as_file(); + let mut diff_file = diff.as_file(); + + let base_path = base.as_path().to_path_buf(); + let diff_path = diff.as_path().to_path_buf(); + + // 1. Populated block both in base and diff file + // block: [ ] + // diff: [ ] + // expected: [d] + let base_block = rand::rand_bytes(block_size); + base_file.write_all(&base_block).unwrap(); + let diff_block = rand::rand_bytes(block_size); + diff_file.write_all(&diff_block).unwrap(); + expected_result.extend(diff_block); + + // 2. Populated block in base file, hole in diff file + // block: [ ] [ ] + // diff: [ ] ___ + // expected: [d] [b] + let base_block = rand::rand_bytes(block_size); + base_file.write_all(&base_block).unwrap(); + diff_file + .seek(SeekFrom::Current(block_size as i64)) + .unwrap(); + expected_result.extend(base_block); + + // 3. Populated block in base file, zeroes block in diff file + // block: [ ] [ ] [ ] + // diff: [ ] ___ [0] + // expected: [d] [b] [d] + let base_block = rand::rand_bytes(block_size); + base_file.write_all(&base_block).unwrap(); + let diff_block = vec![0u8; block_size]; + diff_file.write_all(&diff_block).unwrap(); + expected_result.extend(diff_block); + + // Rebase and check the result + rebase(base_path.clone(), diff_path.clone()).unwrap(); + check_file_content(base_file, &expected_result); + + // 4. The diff file is bigger + // block: [ ] [ ] [ ] + // diff: [ ] ___ [0] [ ] + // expected: [d] [b] [d] [d] + let diff_block = rand::rand_bytes(block_size); + diff_file.write_all(&diff_block).unwrap(); + expected_result.extend(diff_block); + // Rebase and check the result + rebase(base_path.clone(), diff_path.clone()).unwrap(); + check_file_content(base_file, &expected_result); + + // 5. The base file is bigger + // block: [ ] [ ] [ ] [ ] [ ] + // diff: [ ] ___ [0] [ ] + // expected: [d] [b] [d] [d] [b] + let base_block = rand::rand_bytes(block_size); + // Adding to the base file 2 times because + // it is 1 block smaller then diff right now. + base_file.write_all(&base_block).unwrap(); + base_file.write_all(&base_block).unwrap(); + expected_result.extend(base_block); + // Rebase and check the result + rebase(base_path, diff_path).unwrap(); + check_file_content(base_file, &expected_result); + } + } +} diff --git a/src/snapshot-editor/src/main.rs b/src/snapshot-editor/src/main.rs index 178ca440cc5..de508c4ced0 100644 --- a/src/snapshot-editor/src/main.rs +++ b/src/snapshot-editor/src/main.rs @@ -3,17 +3,21 @@ use clap::{Parser, Subcommand}; +mod edit_memory; #[cfg(target_arch = "aarch64")] mod edit_vmstate; mod info; mod utils; +use edit_memory::{edit_memory_command, EditMemoryError, EditMemorySubCommand}; #[cfg(target_arch = "aarch64")] use edit_vmstate::{edit_vmstate_command, EditVmStateError, EditVmStateSubCommand}; use info::{info_vmstate_command, InfoVmStateError, InfoVmStateSubCommand}; #[derive(Debug, thiserror::Error)] enum SnapEditorError { + #[error("Error during editing memory file: {0}")] + EditMemory(#[from] EditMemoryError), #[cfg(target_arch = "aarch64")] #[error("Error during editing vmstate file: {0}")] EditVmState(#[from] EditVmStateError), @@ -30,6 +34,8 @@ struct Cli { #[derive(Debug, Subcommand)] enum Command { + #[command(subcommand)] + EditMemory(EditMemorySubCommand), #[cfg(target_arch = "aarch64")] #[command(subcommand)] EditVmstate(EditVmStateSubCommand), @@ -41,6 +47,7 @@ fn main_exec() -> Result<(), SnapEditorError> { let cli = Cli::parse(); match cli.command { + Command::EditMemory(command) => edit_memory_command(command)?, #[cfg(target_arch = "aarch64")] Command::EditVmstate(command) => edit_vmstate_command(command)?, Command::InfoVmstate(command) => info_vmstate_command(command)?, diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 26a87533737..d69ddcf90ad 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -78,11 +78,15 @@ def is_diff(self) -> bool: """Is this a DIFF snapshot?""" return self.snapshot_type == SnapshotType.DIFF - def rebase_snapshot(self, base): + def rebase_snapshot(self, base, use_snapshot_editor=False): """Rebases current incremental snapshot onto a specified base layer.""" if not self.is_diff: raise ValueError("Can only rebase DIFF snapshots") - build_tools.run_rebase_snap_bin(base.mem, self.mem) + if use_snapshot_editor: + build_tools.run_snap_editor_rebase(base.mem, self.mem) + else: + build_tools.run_rebase_snap_bin(base.mem, self.mem) + new_args = self.__dict__ | {"mem": base.mem} return Snapshot(**new_args) diff --git a/tests/host_tools/cargo_build.py b/tests/host_tools/cargo_build.py index dffe0479756..778ab26200b 100644 --- a/tests/host_tools/cargo_build.py +++ b/tests/host_tools/cargo_build.py @@ -123,6 +123,25 @@ def run_seccompiler_bin(bpf_path, json_path=defs.SECCOMP_JSON_DIR, basic=False): assert rc == 0 +@with_filelock +def run_snap_editor_rebase(base_snap, diff_snap): + """ + Run apply_diff_snap. + + :param base_snap: path to the base snapshot mem file + :param diff_snap: path to diff snapshot mem file + """ + cargo_target = "{}-unknown-linux-musl".format(platform.machine()) + + rc, _, _ = cargo( + "run", + f"-p snapshot-editor --target {cargo_target}", + f"edit-memory rebase --memory-path {base_snap} --diff-path {diff_snap}", + ) + + assert rc == 0 + + @with_filelock def run_rebase_snap_bin(base_snap, diff_snap): """ diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py index 8220b8e93f6..de43f0b83a1 100644 --- a/tests/integration_tests/functional/test_snapshot_basic.py +++ b/tests/integration_tests/functional/test_snapshot_basic.py @@ -40,6 +40,7 @@ def _get_guest_drive_size(ssh_connection, guest_dev_name="/dev/vdb"): # - Microvm: 2vCPU with 512 MB RAM # TODO: Multiple microvm sizes must be tested in the async pipeline. @pytest.mark.parametrize("snapshot_type", [SnapshotType.DIFF, SnapshotType.FULL]) +@pytest.mark.parametrize("use_snapshot_editor", [False, True]) def test_5_snapshots( bin_vsock_path, tmp_path, @@ -47,6 +48,7 @@ def test_5_snapshots( guest_kernel, rootfs, snapshot_type, + use_snapshot_editor, ): """ Create and load 5 snapshots. @@ -104,7 +106,9 @@ def test_5_snapshots( # current layer. if snapshot.is_diff: logger.info("Base: %s, Layer: %s", base_snapshot.mem, snapshot.mem) - snapshot = snapshot.rebase_snapshot(base_snapshot) + snapshot = snapshot.rebase_snapshot( + base_snapshot, use_snapshot_editor=use_snapshot_editor + ) # Update the base for next iteration. base_snapshot = snapshot