From b77c4915e8277529e6172cf4c5ece84811f2ff49 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Fri, 1 Dec 2023 15:24:31 -0500 Subject: [PATCH] WIP: Use `podman pull` to fetch containers See https://github.com/containers/bootc/issues/147#issuecomment-1828751377 With this bootc starts to really gain support for a different backend than ostree. Here we basically just fork off `podman pull` to fetch container images into an *alternative root* in `/ostree/container-storage`, (Because otherwise basic things like `podman image prune` would delete the OS image) This is quite distinct from our use of `skopeo` in the ostree-ext project because suddenly now we gain support for things implemented in the containers/storage library like `zstd:chunked` and OCI crypt. *However*...today we still need to generate a final flattened filesystem tree (and an ostree commit) in order to maintain compatibilty with stuff in rpm-ostree. (A corrollary to this is we're not booting into a `podman mount` overlayfs stack) Related to this, we also need to handle SELinux labeling. Hence, we implement "layer squashing", and then do some final "postprocessing" on the resulting image matching the same logic that's done in ostree-ext such as `etc -> usr/etc` and handling `/var`. Note this also really wants https://github.com/ostreedev/ostree/pull/3106 to avoid duplicating disk space. Signed-off-by: Colin Walters --- lib/src/cli.rs | 30 +++- lib/src/deploy.rs | 55 ++++++- lib/src/lib.rs | 5 +- lib/src/ostree_authfile.rs | 72 +++++++++ lib/src/podman.rs | 148 +++++++++++++++++++ lib/src/podman_ostree.rs | 296 +++++++++++++++++++++++++++++++++++++ lib/src/spec.rs | 24 +++ lib/src/status.rs | 90 +++++++---- lib/src/utils.rs | 94 ++++++++++++ 9 files changed, 779 insertions(+), 35 deletions(-) create mode 100644 lib/src/ostree_authfile.rs create mode 100644 lib/src/podman_ostree.rs diff --git a/lib/src/cli.rs b/lib/src/cli.rs index 842ae6ae..c1351260 100644 --- a/lib/src/cli.rs +++ b/lib/src/cli.rs @@ -84,6 +84,10 @@ pub(crate) struct SwitchOpts { /// Target image to use for the next boot. pub(crate) target: String, + + /// The storage backend + #[clap(long, hide = true)] + pub(crate) backend: Option, } /// Perform an edit operation @@ -158,6 +162,15 @@ pub(crate) enum TestingOpts { }, } +/// Options for internal testing +#[derive(Debug, clap::Parser)] +pub(crate) struct InternalPodmanOpts { + #[clap(long, value_parser, default_value = "/")] + root: Utf8PathBuf, + #[clap(trailing_var_arg = true, allow_hyphen_values = true)] + args: Vec, +} + /// Deploy and transactionally in-place with bootable container images. /// /// The `bootc` project currently uses ostree-containers as a backend @@ -226,6 +239,9 @@ pub(crate) enum Opt { #[clap(trailing_var_arg = true, allow_hyphen_values = true)] args: Vec, }, + /// Execute podman in our internal configuration + #[clap(hide = true)] + InternalPodman(InternalPodmanOpts), /// Internal integration testing helpers. #[clap(hide(true), subcommand)] #[cfg(feature = "internal-testing-api")] @@ -361,7 +377,7 @@ async fn upgrade(opts: UpgradeOpts) -> Result<()> { } } } else { - let fetched = crate::deploy::pull(sysroot, imgref, opts.quiet).await?; + let fetched = crate::deploy::pull(sysroot, spec.backend, imgref, opts.quiet).await?; let staged_digest = staged_image.as_ref().map(|s| s.image_digest.as_str()); let fetched_digest = fetched.manifest_digest.as_str(); tracing::debug!("staged: {staged_digest:?}"); @@ -446,6 +462,7 @@ async fn switch(opts: SwitchOpts) -> Result<()> { let new_spec = { let mut new_spec = host.spec.clone(); new_spec.image = Some(target.clone()); + new_spec.backend = opts.backend.unwrap_or_default(); new_spec }; @@ -455,7 +472,7 @@ async fn switch(opts: SwitchOpts) -> Result<()> { } let new_spec = RequiredHostSpec::from_spec(&new_spec)?; - let fetched = crate::deploy::pull(sysroot, &target, opts.quiet).await?; + let fetched = crate::deploy::pull(sysroot, new_spec.backend, &target, opts.quiet).await?; if !opts.retain { // By default, we prune the previous ostree ref so it will go away after later upgrades @@ -497,7 +514,8 @@ async fn edit(opts: EditOpts) -> Result<()> { return Ok(()); } let new_spec = RequiredHostSpec::from_spec(&new_host.spec)?; - let fetched = crate::deploy::pull(sysroot, new_spec.image, opts.quiet).await?; + let fetched = + crate::deploy::pull(sysroot, new_spec.backend, new_spec.image, opts.quiet).await?; // TODO gc old layers here @@ -550,6 +568,12 @@ async fn run_from_opt(opt: Opt) -> Result<()> { crate::hostexec::exec_in_host_mountns(args.as_slice()) } Opt::Status(opts) => super::status::status(opts).await, + Opt::InternalPodman(args) => { + prepare_for_write().await?; + // This also remounts writable + let _sysroot = get_locked_sysroot().await?; + crate::podman::exec(args.root.as_path(), args.args.as_slice()) + } #[cfg(feature = "internal-testing-api")] Opt::InternalTests(opts) => crate::privtests::run(opts).await, #[cfg(feature = "docgen")] diff --git a/lib/src/deploy.rs b/lib/src/deploy.rs index 11aca2a7..f1415fd0 100644 --- a/lib/src/deploy.rs +++ b/lib/src/deploy.rs @@ -8,15 +8,18 @@ use anyhow::{Context, Result}; use cap_std::fs::{Dir, MetadataExt}; use cap_std_ext::cap_std; use cap_std_ext::dirext::CapStdExtDirExt; +use chrono::DateTime; use fn_error_context::context; use ostree::{gio, glib}; use ostree_container::OstreeImageReference; use ostree_ext::container as ostree_container; use ostree_ext::container::store::PrepareResult; +use ostree_ext::oci_spec; use ostree_ext::ostree; use ostree_ext::ostree::Deployment; use ostree_ext::sysroot::SysrootLock; +use crate::spec::Backend; use crate::spec::HostSpec; use crate::spec::ImageReference; use crate::status::labels_of_config; @@ -30,11 +33,14 @@ const BOOTC_DERIVED_KEY: &str = "bootc.derived"; /// Variant of HostSpec but required to be filled out pub(crate) struct RequiredHostSpec<'a> { pub(crate) image: &'a ImageReference, + pub(crate) backend: Backend, } /// State of a locally fetched image pub(crate) struct ImageState { + pub(crate) backend: Backend, pub(crate) manifest_digest: String, + pub(crate) created: Option>, pub(crate) version: Option, pub(crate) ostree_commit: String, } @@ -47,7 +53,10 @@ impl<'a> RequiredHostSpec<'a> { .image .as_ref() .ok_or_else(|| anyhow::anyhow!("Missing image in specification"))?; - Ok(Self { image }) + Ok(Self { + image, + backend: spec.backend, + }) } } @@ -55,8 +64,17 @@ impl From for ImageState { fn from(value: ostree_container::store::LayeredImageState) -> Self { let version = value.version().map(|v| v.to_owned()); let ostree_commit = value.get_commit().to_owned(); + let labels = crate::status::labels_of_config(&value.configuration); + let created = labels + .and_then(|l| { + l.get(oci_spec::image::ANNOTATION_CREATED) + .map(|s| s.as_str()) + }) + .and_then(crate::status::try_deserialize_timestamp); Self { + backend: Backend::OstreeContainer, manifest_digest: value.manifest_digest, + created, version, ostree_commit, } @@ -69,8 +87,14 @@ impl ImageState { &self, repo: &ostree::Repo, ) -> Result> { - ostree_container::store::query_image_commit(repo, &self.ostree_commit) - .map(|v| Some(v.manifest)) + match self.backend { + Backend::OstreeContainer => { + ostree_container::store::query_image_commit(repo, &self.ostree_commit) + .map(|v| Some(v.manifest)) + } + // TODO: Figure out if we can get the OCI manifest from podman + Backend::Container => Ok(None), + } } } @@ -114,6 +138,31 @@ pub(crate) fn check_bootc_label(config: &ostree_ext::oci_spec::image::ImageConfi /// Wrapper for pulling a container image, wiring up status output. #[context("Pulling")] pub(crate) async fn pull( + sysroot: &SysrootLock, + backend: Backend, + imgref: &ImageReference, + quiet: bool, +) -> Result> { + match backend { + Backend::OstreeContainer => pull_via_ostree(sysroot, imgref, quiet).await, + Backend::Container => pull_via_podman(sysroot, imgref, quiet).await, + } +} + +/// Wrapper for pulling a container image, wiring up status output. +async fn pull_via_podman( + sysroot: &SysrootLock, + imgref: &ImageReference, + quiet: bool, +) -> Result> { + let rootfs = &Dir::reopen_dir(&crate::utils::sysroot_fd_borrowed(sysroot))?; + let fetched_imageid = crate::podman::podman_pull(rootfs, imgref, quiet).await?; + crate::podman_ostree::commit_image_to_ostree(sysroot, &fetched_imageid) + .await + .map(Box::new) +} + +async fn pull_via_ostree( sysroot: &SysrootLock, imgref: &ImageReference, quiet: bool, diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 2165247c..97462153 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -23,6 +23,9 @@ pub(crate) mod hostexec; pub(crate) mod journal; mod lsm; pub(crate) mod metadata; +mod ostree_authfile; +mod podman; +mod podman_ostree; mod reboot; mod reexec; mod status; @@ -45,8 +48,6 @@ mod k8sapitypes; mod kernel; #[cfg(feature = "install")] pub(crate) mod mount; -#[cfg(feature = "install")] -mod podman; pub mod spec; #[cfg(feature = "docgen")] diff --git a/lib/src/ostree_authfile.rs b/lib/src/ostree_authfile.rs new file mode 100644 index 00000000..1e10cd36 --- /dev/null +++ b/lib/src/ostree_authfile.rs @@ -0,0 +1,72 @@ +//! # Copy of the ostree authfile bits as they're not public + +use anyhow::Result; +use once_cell::sync::OnceCell; +use ostree_ext::glib; +use std::fs::File; +use std::path::{Path, PathBuf}; + +// https://docs.rs/openat-ext/0.1.10/openat_ext/trait.OpenatDirExt.html#tymethod.open_file_optional +// https://users.rust-lang.org/t/why-i-use-anyhow-error-even-in-libraries/68592 +pub(crate) fn open_optional(path: impl AsRef) -> std::io::Result> { + match std::fs::File::open(path.as_ref()) { + Ok(r) => Ok(Some(r)), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(e) => Err(e), + } +} + +struct ConfigPaths { + persistent: PathBuf, + runtime: PathBuf, +} + +/// Get the runtime and persistent config directories. In the system (root) case, these +/// system(root) case: /run/ostree /etc/ostree +/// user(nonroot) case: /run/user/$uid/ostree ~/.config/ostree +fn get_config_paths() -> &'static ConfigPaths { + static PATHS: OnceCell = OnceCell::new(); + PATHS.get_or_init(|| { + let mut r = if rustix::process::getuid() == rustix::process::Uid::ROOT { + ConfigPaths { + persistent: PathBuf::from("/etc"), + runtime: PathBuf::from("/run"), + } + } else { + ConfigPaths { + persistent: glib::user_config_dir(), + runtime: glib::user_runtime_dir(), + } + }; + let path = "ostree"; + r.persistent.push(path); + r.runtime.push(path); + r + }) +} + +impl ConfigPaths { + /// Return the path and an open fd for a config file, if it exists. + pub(crate) fn open_file(&self, p: impl AsRef) -> Result> { + let p = p.as_ref(); + let mut runtime = self.runtime.clone(); + runtime.push(p); + if let Some(f) = open_optional(&runtime)? { + return Ok(Some((runtime, f))); + } + let mut persistent = self.persistent.clone(); + persistent.push(p); + if let Some(f) = open_optional(&persistent)? { + return Ok(Some((persistent, f))); + } + Ok(None) + } +} + +/// Return the path to the global container authentication file, if it exists. +pub(crate) fn get_global_authfile_path() -> Result> { + let paths = get_config_paths(); + let r = paths.open_file("auth.json")?; + // TODO pass the file descriptor to the proxy, not a global path + Ok(r.map(|v| v.0)) +} diff --git a/lib/src/podman.rs b/lib/src/podman.rs index 24b659c0..90cbc4fb 100644 --- a/lib/src/podman.rs +++ b/lib/src/podman.rs @@ -1,12 +1,152 @@ +//! # Helpers for interacting with podman +//! +//! Wrapper for podman which writes to a bootc-owned root. + +use std::os::unix::process::CommandExt; +use std::path::Path; + use anyhow::{anyhow, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use cap_std_ext::cap_std; +use cap_std_ext::cap_std::fs::Dir; +use ostree_ext::container::OstreeImageReference; use serde::Deserialize; +use tokio::process::Command; use crate::hostexec::run_in_host_mountns; +use crate::ostree_authfile; +use crate::spec::ImageReference; use crate::task::Task; +use crate::utils::{cmd_in_root, newline_trim_vec_to_string}; /// Where we look inside our container to find our own image /// for use with `bootc install`. pub(crate) const CONTAINER_STORAGE: &str = "/var/lib/containers"; +/// The argument for podman --root, in parallel to `ostree/repo`. +pub(crate) const STORAGE_ROOT: &str = "ostree/container-storage"; +/// The argument for podman --runroot, this is stored under /run/bootc. +pub(crate) const RUN_ROOT: &str = "run/bootc/container-storage"; +const PODMAN_ARGS: &[&str] = &["--root", STORAGE_ROOT, "--runroot", RUN_ROOT]; + +pub(crate) fn podman_in_root(rootfs: &Dir) -> Result { + let mut cmd = cmd_in_root(rootfs, "podman")?; + cmd.args(PODMAN_ARGS); + Ok(cmd) +} + +pub(crate) async fn temporary_container_for_image(rootfs: &Dir, imageid: &str) -> Result { + tracing::debug!("Creating temporary container for {imageid}"); + let st = podman_in_root(rootfs)? + .args(["create", imageid]) + .output() + .await?; + if !st.status.success() { + anyhow::bail!("Failed to create transient image: {st:?}"); + } + Ok(newline_trim_vec_to_string(st.stdout)?) +} + +pub(crate) async fn podman_mount(rootfs: &Dir, cid: &str) -> Result { + tracing::debug!("Mounting {cid}"); + let st = podman_in_root(rootfs)? + .args(["mount", cid]) + .output() + .await?; + if !st.status.success() { + anyhow::bail!("Failed to mount transient image: {st:?}"); + } + Ok(newline_trim_vec_to_string(st.stdout)?.into()) +} + +pub(crate) async fn podman_pull( + rootfs: &Dir, + image: &ImageReference, + quiet: bool, +) -> Result { + let authfile = ostree_authfile::get_global_authfile_path()?; + let mut cmd = podman_in_root(rootfs)?; + let image = OstreeImageReference::from(image.clone()); + let pull_spec_image = image.imgref.to_string(); + tracing::debug!("Pulling {pull_spec_image}"); + let child = cmd + .args(["pull"]) + .args(authfile.iter().flat_map(|v| [Path::new("--authfile"), v])) + .args(quiet.then_some("--quiet")) + .arg(&pull_spec_image) + .stdout(std::process::Stdio::piped()) + .spawn()?; + let output = child.wait_with_output().await?; + if !output.status.success() { + anyhow::bail!("Failed to pull: {:?}", output.status); + } + Ok(newline_trim_vec_to_string(output.stdout)?.into()) +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct PodmanInspect { + #[allow(dead_code)] + pub(crate) id: String, + pub(crate) digest: String, + pub(crate) created: Option>, + pub(crate) config: PodmanInspectConfig, + #[serde(rename = "RootFS")] + #[allow(dead_code)] + pub(crate) root_fs: PodmanInspectRootfs, + pub(crate) graph_driver: PodmanInspectGraphDriver, +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct PodmanInspectConfig { + #[serde(default)] + pub(crate) labels: std::collections::BTreeMap, +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct PodmanInspectGraphDriver { + pub(crate) name: String, + pub(crate) data: PodmanInspectGraphDriverData, +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct PodmanInspectGraphDriverData { + pub(crate) lower_dir: String, + pub(crate) upper_dir: String, +} + +impl PodmanInspectGraphDriverData { + pub(crate) fn layers(&self) -> impl Iterator { + self.lower_dir + .split(':') + .chain(std::iter::once(self.upper_dir.as_str())) + } +} + +#[derive(Deserialize)] +#[serde(rename_all = "PascalCase")] +pub(crate) struct PodmanInspectRootfs { + #[allow(dead_code)] + pub(crate) layers: Vec, +} + +pub(crate) async fn podman_inspect(rootfs: &Dir, imgid: &str) -> Result { + let st = podman_in_root(rootfs)? + .args(["image", "inspect", imgid]) + .output() + .await?; + if !st.status.success() { + anyhow::bail!("Failed to mount transient image: {st:?}"); + } + let r: Vec = serde_json::from_slice(&st.stdout)?; + let r = r + .into_iter() + .next() + .ok_or_else(|| anyhow!("Missing output from inspect"))?; + Ok(r) +} #[derive(Deserialize)] #[serde(rename_all = "PascalCase")] @@ -27,3 +167,11 @@ pub(crate) fn imageid_to_digest(imgid: &str) -> Result { .ok_or_else(|| anyhow!("No images returned for inspect"))?; Ok(i.digest) } + +pub(crate) fn exec(root: &Utf8Path, args: &[std::ffi::OsString]) -> Result<()> { + let rootfs = &Dir::open_ambient_dir(root, cap_std::ambient_authority())?; + let mut cmd = crate::utils::sync_cmd_in_root(rootfs, "podman")?; + cmd.args(PODMAN_ARGS); + cmd.args(args); + Err(anyhow::Error::msg(cmd.exec())) +} diff --git a/lib/src/podman_ostree.rs b/lib/src/podman_ostree.rs new file mode 100644 index 00000000..251fd747 --- /dev/null +++ b/lib/src/podman_ostree.rs @@ -0,0 +1,296 @@ +//! # Mapping between podman/containers-storage: and ostree +//! +//! The common container storage model is to store blobs (layers) as unpacked directories, +//! and use the Linux `overlayfs` to merge them dynamically. +//! +//! However, today the `ostree-prepare-root` model as used by ostree expects a final flattened +//! filesystem tree; and crucially we need to perform SELinux labeling. At the moment, because +//! ostree again works on just a plain directory, we need to "physically" change the on-disk +//! xattrs of the target files. +//! +//! That said, there is work in ostree to use composefs, which will add a huge amount of flexibility; +//! we can generate an erofs blob dynamically with the target labels. +//! +//! Even more than that however the ostree core currently expects an ostree commit object to be backing +//! the filesystem tree; this is how it handles garbage collection, inspects metadata, etc. Parts +//! of bootc rely on this too today. +//! +//! ## Disadvantages +//! +//! One notable disadvantage of this model is that we're storing file *references* twice, +//! which means the ostree deduplication is pointless. In theory this is fixable by going back +//! and changing the containers-storage files, but... +//! +//! ## Medium term: Unify containers-storage and ostree with composefs +//! +//! Ultimately the best fix is https://github.com/containers/composefs/issues/125 + +use std::cell::OnceCell; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicI64, Ordering}; +use std::sync::Arc; + +use anyhow::{Context, Result}; + +use cap_std::fs::Dir; +use cap_std::fs::{DirBuilder, DirEntry}; +use cap_std::io_lifetimes::AsFilelike; +use cap_std_ext::cap_tempfile::{TempDir, TempFile}; +use cap_std_ext::cmdext::CapStdExtCommandExt; +use cap_std_ext::dirext::CapStdExtDirExt; +use cap_std_ext::{ + cap_primitives::{ + self, + fs::{DirBuilderExt, MetadataExt, OpenOptionsExt}, + }, + cap_std, +}; +use fn_error_context::context; +use ostree_ext::sysroot::SysrootLock; +use rustix::fd::AsFd; + +use crate::deploy::ImageState; +use crate::podman::PodmanInspectGraphDriver; +use crate::utils::sync_cmd_in_root; + +const OSTREE_CONTAINER_IMAGE_REF_PREFIX: &str = "ostree-container/image"; + +fn image_commit_ostree_ref(imageid: &str) -> String { + format!("{OSTREE_CONTAINER_IMAGE_REF_PREFIX}/{imageid}") +} + +struct MergeState<'a> { + trash: &'a Dir, + // Unique integer for naming trashed files + trashid: AtomicI64, + can_clone: bool, +} + +/// Given one directory entry, perform an overlayfs-style merge operation. +fn merge_one_entry( + layer: &Dir, + elt: DirEntry, + pathbuf: &mut std::path::PathBuf, + output: &Dir, + state: &MergeState, +) -> Result<()> { + let name = elt.file_name(); + // We operate on a shared path buffer for improved efficiency. + // Here, we append the name of the target file. + pathbuf.push(&name); + let src_meta = elt.metadata()?; + let inum = src_meta.ino(); + let src_ftype = src_meta.file_type(); + + // Helper closure which lazily initializes a "layer trash directory" and moves the target path into it. + let move_to_trash = |src: &Path| -> anyhow::Result<()> { + let id = state.trashid.fetch_add(1, Ordering::SeqCst); + let tempname = format!("t{:X}-{:X}", id, inum); + output + .rename(src, state.trash, &tempname) + .with_context(|| format!("Moving {src:?} to trash"))?; + Ok(()) + }; + + let target_meta = output + .symlink_metadata_optional(&pathbuf) + .context("Querying target")?; + if src_ftype.is_dir() { + // The source layer type is a directory. Check if we need to create it. + let mut needs_create = true; + if let Some(target_meta) = target_meta { + if target_meta.is_dir() { + needs_create = false; + } else { + // The target exists and is not a directory. Trash it. + move_to_trash(&pathbuf)?; + } + } + // Create the directory if needed. + if needs_create { + let mut db = DirBuilder::new(); + db.mode(src_meta.mode()); + output + .create_dir_with(&pathbuf, &db) + .with_context(|| format!("Creating {pathbuf:?}"))?; + } + // Now recurse + merge_layer(layer, pathbuf, output, state)?; + } else if (src_meta.mode() & libc::S_IFMT) == libc::S_IFCHR && src_meta.rdev() == 0 { + // The layer specifies a whiteout entry; remove the target path. + if target_meta.is_some() { + move_to_trash(&pathbuf)?; + } + } else { + // We're operating on a non-directory. In this case if the target exists, + // it needs to be removed. + if target_meta.is_some() { + move_to_trash(&pathbuf)?; + } + if src_meta.is_symlink() { + let target = + cap_primitives::fs::read_link_contents(&layer.as_filelike_view(), &pathbuf) + .with_context(|| format!("Reading link {pathbuf:?}"))?; + cap_primitives::fs::symlink_contents(target, &output.as_filelike_view(), &pathbuf) + .with_context(|| format!("Writing symlink {pathbuf:?}"))?; + } else { + let src = layer + .open(&pathbuf) + .with_context(|| format!("Opening src {pathbuf:?}"))?; + // Use reflinks if available, otherwise we can fall back to hard linking. The hardlink + // count will "leak" into any containers spawned (until podman learns to use composefs). + if state.can_clone { + let mut openopts = cap_std::fs::OpenOptions::new(); + openopts.write(true); + openopts.create_new(true); + openopts.mode(src_meta.mode()); + let dest = output + .open_with(&pathbuf, &openopts) + .with_context(|| format!("Opening dest {pathbuf:?}"))?; + rustix::fs::ioctl_ficlone(dest.as_fd(), src.as_fd()).context("Cloning")?; + } else { + layer + .hard_link(&pathbuf, output, &pathbuf) + .context("Hard linking")?; + } + } + } + assert!(pathbuf.pop()); + Ok(()) +} + +/// This function is an "eager" implementation of computing the filesystem tree, implementing +/// the same algorithm as overlayfs, including processing whiteouts. +fn merge_layer( + layer: &Dir, + pathbuf: &mut std::path::PathBuf, + output: &Dir, + state: &MergeState, +) -> Result<()> { + for elt in layer.read_dir(&pathbuf)? { + let elt = elt?; + merge_one_entry(layer, elt, pathbuf, output, state)?; + } + Ok(()) +} + +#[context("Squashing to tempdir")] +async fn generate_squashed_dir( + rootfs: &Dir, + graph: PodmanInspectGraphDriver, +) -> Result { + let ostree_tmp = &rootfs.open_dir("ostree/repo/tmp")?; + let td = TempDir::new_in(ostree_tmp)?; + // We put files/directories which should be deleted here; they're processed asynchronously + let trashdir = TempDir::new_in(ostree_tmp)?; + anyhow::ensure!(graph.name == "overlay"); + let rootfs = rootfs.try_clone()?; + let td = tokio::task::spawn_blocking(move || { + let can_clone = OnceCell::::new(); + for layer in graph.data.layers() { + // TODO: Does this actually work when operating on a non-default root? + let layer = layer.trim_start_matches('/'); + tracing::debug!("Merging layer: {layer}"); + let layer = rootfs + .open_dir(layer) + .with_context(|| format!("Opening {layer}"))?; + // Determine if we can do reflinks + if can_clone.get().is_none() { + let src = TempFile::new(&layer)?; + let dest = TempFile::new(&td)?; + let did_clone = + rustix::fs::ioctl_ficlone(dest.as_file().as_fd(), src.as_file().as_fd()) + .is_ok(); + can_clone.get_or_init(|| did_clone); + } + let mut pathbuf = PathBuf::from("."); + let mergestate = MergeState { + trash: &trashdir, + trashid: Default::default(), + can_clone: *can_clone.get().unwrap(), + }; + merge_layer(&layer, &mut pathbuf, &td, &mergestate)?; + } + anyhow::Ok(td) + }) + .await??; + Ok(td) +} + +/// Post-process target directory +pub(crate) fn prepare_squashed_root(rootfs: &Dir) -> Result<()> { + if rootfs.exists("etc") { + rootfs + .rename("etc", rootfs, "usr/etc") + .context("Renaming etc => usr/etc")?; + } + // And move everything in /var to the "factory" directory so it can be processed + // by tmpfiles.d + if let Some(ref var) = rootfs.open_dir_optional("var")? { + let factory_var_path = "usr/share/factory/var"; + rootfs.create_dir_all(factory_var_path)?; + let factory_var = &rootfs.open_dir(factory_var_path)?; + for ent in var.entries()? { + let ent = ent?; + let name = ent.file_name(); + var.rename(&name, factory_var, &name) + .with_context(|| format!("Moving var/{name:?} to {factory_var_path}"))?; + } + } + Ok(()) +} + +/// Given an image in containers-storage, generate an ostree commit from it +pub(crate) async fn commit_image_to_ostree( + sysroot: &SysrootLock, + imageid: &str, +) -> Result { + let rootfs = &Dir::reopen_dir(&crate::utils::sysroot_fd_borrowed(sysroot))?; + + // Mount the merged filesystem (via overlayfs) basically just so we can get the final + // SELinux policy in /etc/selinux which we need to compute the labels + let cid = crate::podman::temporary_container_for_image(rootfs, imageid).await?; + let mount_path = &crate::podman::podman_mount(rootfs, &cid).await?; + // Gather metadata on the image, including its constitutent layers + let mut inspect = crate::podman::podman_inspect(rootfs, imageid).await?; + let manifest_digest = inspect.digest; + + // Merge the layers into one final filesystem tree + let squashed = generate_squashed_dir(rootfs, inspect.graph_driver).await?; + // Post-process the merged tree + let squashed = tokio::task::spawn_blocking(move || { + prepare_squashed_root(&squashed)?; + anyhow::Ok(squashed) + }) + .await??; + + tracing::debug!("Writing ostree commit"); + let repo_fd = Arc::new(sysroot.repo().dfd_borrow().try_clone_to_owned()?); + let ostree_ref = image_commit_ostree_ref(imageid); + let mut cmd = sync_cmd_in_root(&squashed, "ostree")?; + cmd.args([ + "--repo=/proc/self/fd/3", + "commit", + "--consume", + "--selinux-policy", + mount_path.as_str(), + "--branch", + ostree_ref.as_str(), + "--tree=dir=.", + ]); + cmd.take_fd_n(repo_fd, 3); + let mut cmd = tokio::process::Command::from(cmd); + cmd.kill_on_drop(true); + let st = cmd.status().await?; + if !st.success() { + anyhow::bail!("Failed to ostree commit: {st:?}") + } + let ostree_commit = sysroot.repo().require_rev(&ostree_ref)?.to_string(); + Ok(ImageState { + backend: crate::spec::Backend::Container, + created: inspect.created, + manifest_digest, + version: inspect.config.labels.remove("version"), + ostree_commit, + }) +} diff --git a/lib/src/spec.rs b/lib/src/spec.rs index 6de96390..7f7156fb 100644 --- a/lib/src/spec.rs +++ b/lib/src/spec.rs @@ -28,12 +28,33 @@ pub struct Host { pub status: HostStatus, } +#[derive( + clap::ValueEnum, Serialize, Deserialize, Copy, Clone, Debug, PartialEq, Eq, JsonSchema, +)] +#[serde(rename_all = "camelCase")] +/// The storage backend +pub enum Backend { + /// Use the ostree-container storage backend. + OstreeContainer, + /// Use containers-storage: backend + Container, +} + +impl Default for Backend { + fn default() -> Self { + Self::OstreeContainer + } +} + #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase")] /// The host specification pub struct HostSpec { /// The host image pub image: Option, + /// The storage backend + #[serde(default)] + pub backend: Backend, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)] @@ -97,6 +118,9 @@ pub struct BootEntry { pub incompatible: bool, /// Whether this entry will be subject to garbage collection pub pinned: bool, + /// The backend for this boot entry + #[serde(default)] + pub backend: Backend, /// If this boot entry is ostree based, the corresponding state pub ostree: Option, } diff --git a/lib/src/status.rs b/lib/src/status.rs index dba4889f..a7bf1a71 100644 --- a/lib/src/status.rs +++ b/lib/src/status.rs @@ -1,9 +1,11 @@ use std::collections::VecDeque; -use crate::spec::{BootEntry, Host, HostSpec, HostStatus, HostType, ImageStatus}; +use crate::deploy::ImageState; +use crate::spec::{Backend, BootEntry, Host, HostSpec, HostStatus, HostType, ImageStatus}; use crate::spec::{ImageReference, ImageSignature}; use anyhow::{Context, Result}; use camino::Utf8Path; +use clap::ValueEnum; use fn_error_context::context; use ostree::glib; use ostree_container::OstreeImageReference; @@ -107,6 +109,16 @@ pub(crate) fn try_deserialize_timestamp(t: &str) -> Option Result { + let r = origin + .optional_string("bootc", "backend")? + .map(|v| Backend::from_str(&v, true)) + .transpose() + .map_err(anyhow::Error::msg)? + .unwrap_or_default(); + Ok(r) +} + pub(crate) fn labels_of_config( config: &oci_spec::image::ImageConfiguration, ) -> Option<&std::collections::HashMap> { @@ -143,36 +155,53 @@ fn boot_entry_from_deployment( deployment: &ostree::Deployment, ) -> Result { let repo = &sysroot.repo(); - let (image, cached_update, incompatible) = if let Some(origin) = deployment.origin().as_ref() { - let incompatible = crate::utils::origin_has_rpmostree_stuff(origin); - let (image, cached) = if incompatible { - // If there are local changes, we can't represent it as a bootc compatible image. - (None, None) - } else if let Some(image) = get_image_origin(origin)? { - let image = ImageReference::from(image); - let csum = deployment.csum(); - let imgstate = ostree_container::store::query_image_commit(repo, &csum)?; - let cached = imgstate.cached_update.map(|cached| { - create_imagestatus(image.clone(), &cached.manifest_digest, &cached.config) - }); - let imagestatus = - create_imagestatus(image, &imgstate.manifest_digest, &imgstate.configuration); - // We found a container-image based deployment - (Some(imagestatus), cached) + let (image, cached_update, incompatible, backend) = + if let Some(origin) = deployment.origin().as_ref() { + let incompatible = crate::utils::origin_has_rpmostree_stuff(origin); + let backend = get_image_backend(origin)?; + let (image, cached) = if incompatible { + // If there are local changes, we can't represent it as a bootc compatible image. + (None, None) + } else if let Some(image) = get_image_origin(origin)? { + let image = ImageReference::from(image); + let csum = deployment.csum(); + let imgstate = match backend { + Backend::Container => { + todo!() + } + Backend::OstreeContainer => { + ImageState::from(*ostree_container::store::query_image_commit(repo, &csum)?) + } + }; + //let cached = imgstate.cached_update.map(|cached| { + // create_imagestatus(image.clone(), &cached.manifest_digest, &cached.config) + //}); + let cached = None; + + ( + Some(ImageStatus { + image, + version: imgstate.version, + timestamp: imgstate.created, + image_digest: imgstate.manifest_digest, + }), + cached, + ) + } else { + // The deployment isn't using a container image + (None, None) + }; + (image, cached, incompatible, get_image_backend(origin)?) } else { - // The deployment isn't using a container image - (None, None) + // The deployment has no origin at all (this generally shouldn't happen) + (None, None, false, Default::default()) }; - (image, cached, incompatible) - } else { - // The deployment has no origin at all (this generally shouldn't happen) - (None, None, false) - }; let r = BootEntry { image, cached_update, incompatible, pinned: deployment.is_pinned(), + backend, ostree: Some(crate::spec::BootEntryOstree { checksum: deployment.csum().into(), // SAFETY: The deployserial is really unsigned @@ -259,9 +288,16 @@ pub(crate) fn get_status( let spec = staged .as_ref() .or(booted.as_ref()) - .and_then(|entry| entry.image.as_ref()) - .map(|img| HostSpec { - image: Some(img.image.clone()), + .and_then(|entry| { + let image = entry.image.as_ref(); + if let Some(image) = image { + Some(HostSpec { + image: Some(image.image.clone()), + backend: entry.backend.clone(), + }) + } else { + None + } }) .unwrap_or_default(); diff --git a/lib/src/utils.rs b/lib/src/utils.rs index 5daf76db..dc9b7bf4 100644 --- a/lib/src/utils.rs +++ b/lib/src/utils.rs @@ -1,6 +1,8 @@ +use std::os::fd::BorrowedFd; use std::process::Command; use anyhow::{Context, Result}; +use cap_std_ext::{cap_std::fs::Dir, cmdext::CapStdExtCommandExt}; use ostree::glib; use ostree_ext::container::SignatureSource; use ostree_ext::ostree; @@ -75,6 +77,46 @@ pub(crate) fn sigpolicy_from_opts( } } +#[allow(unsafe_code)] +pub(crate) fn sysroot_fd_borrowed(sysroot: &ostree_ext::ostree::Sysroot) -> BorrowedFd { + // SAFETY: Just borrowing an existing fd; there's aleady a PR to add this + // api to libostree + unsafe { BorrowedFd::borrow_raw(sysroot.fd()) } +} + +#[allow(unsafe_code)] +fn set_pdeathsig(cmd: &mut std::process::Command) { + use std::os::unix::process::CommandExt; + // SAFETY: This is a straightforward use of prctl; would be good + // to put in a crate (maybe cap-std-ext) + unsafe { + cmd.pre_exec(|| { + rustix::process::set_parent_process_death_signal(Some(rustix::process::Signal::Term)) + .map_err(Into::into) + }); + } +} + +/// Create a Command instance that has its current working directory set +/// to the target root, and is also lifecycle-bound to us. +pub(crate) fn sync_cmd_in_root(rootfs: &Dir, cmd: &str) -> Result { + let mut cmd = std::process::Command::new(cmd); + cmd.cwd_dir(rootfs.try_clone()?); + set_pdeathsig(&mut cmd); + Ok(cmd) +} + +/// Create a Command instance that has its current working directory set +/// to the target root, and is also lifecycle-bound to us. +pub(crate) fn cmd_in_root(rootfs: &Dir, cmd: &str) -> Result { + let mut cmd = std::process::Command::new(cmd); + cmd.cwd_dir(rootfs.try_clone()?); + set_pdeathsig(&mut cmd); + let mut cmd = tokio::process::Command::from(cmd); + cmd.kill_on_drop(true); + Ok(cmd) +} + /// Output a warning message that we want to be quite visible. /// The process (thread) execution will be delayed for a short time. pub(crate) fn medium_visibility_warning(s: &str) { @@ -87,6 +129,15 @@ pub(crate) fn medium_visibility_warning(s: &str) { std::thread::sleep(std::time::Duration::from_secs(1)); } +pub(crate) fn newline_trim_vec_to_string(mut v: Vec) -> Result { + let mut i = v.len(); + while i > 0 && v[i - 1] == b'\n' { + i -= 1; + } + v.truncate(i); + String::from_utf8(v).map_err(Into::into) +} + /// Given a possibly tagged image like quay.io/foo/bar:latest and a digest 0ab32..., return /// the digested form quay.io/foo/bar:latest@sha256:0ab32... /// If the image already has a digest, it will be replaced. @@ -96,6 +147,21 @@ pub(crate) fn digested_pullspec(image: &str, digest: &str) -> String { format!("{image}@{digest}") } +#[allow(dead_code)] +pub(crate) fn require_sha256_digest(blobid: &str) -> Result<&str> { + let r = blobid + .split_once("sha256:") + .ok_or_else(|| anyhow::anyhow!("Missing sha256: in blob ID: {blobid}"))? + .1; + if r.len() != 64 { + anyhow::bail!("Invalid digest in blob ID: {blobid}"); + } + if !r.chars().all(|c| char::is_ascii_alphanumeric(&c)) { + anyhow::bail!("Invalid checksum in blob ID: {blobid}"); + } + Ok(r) +} + #[test] fn test_digested_pullspec() { let digest = "ebe3bdccc041864e5a485f1e755e242535c3b83d110c0357fe57f110b73b143e"; @@ -140,3 +206,31 @@ fn test_sigpolicy_from_opts() { SignatureSource::ContainerPolicyAllowInsecure ); } + +#[test] +fn test_newline_trim() { + let ident_cases = ["", "foo"].into_iter().map(|s| s.as_bytes()); + for case in ident_cases { + let r = newline_trim_vec_to_string(Vec::from(case)).unwrap(); + assert_eq!(case, r.as_bytes()); + } + let cases = [("foo\n", "foo"), ("bar\n\n", "bar")]; + for (orig, new) in cases { + let r = newline_trim_vec_to_string(Vec::from(orig)).unwrap(); + assert_eq!(new.as_bytes(), r.as_bytes()); + } +} + +#[test] +fn test_require_sha256_digest() { + assert_eq!( + require_sha256_digest( + "sha256:0b145899261c8a62406f697c67040cbd811f4dfaa9d778426cf1953413be8534" + ) + .unwrap(), + "0b145899261c8a62406f697c67040cbd811f4dfaa9d778426cf1953413be8534" + ); + for e in ["", "sha256:abcde", "sha256:0b145899261c8a62406f697c67040cbd811f4dfaa9d778426cf1953413b34🦀123", "sha512:9895de267ca908c36ed0031c017ba9bf85b83c21ff2bf241766a4037be81f947c68841ee75f003eba3b4bddc524c0357d7bc9ebffe499f5b72f2da3507cb170d"] { + assert!(require_sha256_digest(e).is_err()); + } +}