From 1462bef65afa47416999ac7f7358937e538c16e1 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Fri, 31 Mar 2023 13:15:16 -0700 Subject: [PATCH] Ensure that sandboxed processes exit before their sandboxes are cleaned up (#18632) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As @jsirois discovered and described in https://github.com/pantsbuild/pants/issues/16778#issuecomment-1491120170, because the `local::CommandRunner` does not `wait` for its child process to have exited, it might be possible for a SIGKILL to not have taken effect on the child process before its sandbox has been torn down. And that could result in the process seeing a partial sandbox (and in the case of #16778, potentially cause named cache corruption). This change ensures that we block to call `wait` when spawning local processes by wrapping them in the `ManagedChild` helper that we were already using for interactive processes. It then attempts to clarify the contract of the `CapturedWorkdir` trait (but in order to improve cherry-pickability does not attempt to adjust it), to make it clear that child processes should fully exit before the `run_and_capture_workdir` method has returned. Fixes #16778 🤞. --------- Co-authored-by: John Sirois --- .../process_execution/docker/src/docker.rs | 4 + .../engine/process_execution/src/children.rs | 83 +++++++++-------- .../engine/process_execution/src/local.rs | 90 +++++-------------- src/rust/engine/src/intrinsics.rs | 6 +- 4 files changed, 80 insertions(+), 103 deletions(-) diff --git a/src/rust/engine/process_execution/docker/src/docker.rs b/src/rust/engine/process_execution/docker/src/docker.rs index 5a539d903db..9679bcc14d5 100644 --- a/src/rust/engine/process_execution/docker/src/docker.rs +++ b/src/rust/engine/process_execution/docker/src/docker.rs @@ -531,6 +531,10 @@ impl<'a> process_execution::CommandRunner for CommandRunner<'a> { impl<'a> CapturedWorkdir for CommandRunner<'a> { type WorkdirToken = (String, String); + // TODO: This method currently violates the `Drop` constraint of `CapturedWorkdir`, because the + // Docker container is not necessarily killed when the returned value is Dropped. + // + // see https://github.com/pantsbuild/pants/issues/18210 async fn run_in_workdir<'s, 'c, 'w, 'r>( &'s self, _context: &'c Context, diff --git a/src/rust/engine/process_execution/src/children.rs b/src/rust/engine/process_execution/src/children.rs index 2e7b4a70ad4..e09e1e6936e 100644 --- a/src/rust/engine/process_execution/src/children.rs +++ b/src/rust/engine/process_execution/src/children.rs @@ -1,7 +1,6 @@ // Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). use std::ops::{Deref, DerefMut}; -use std::sync::atomic::{AtomicBool, Ordering}; use std::{thread, time}; use nix::sys::signal; @@ -14,20 +13,22 @@ const GRACEFUL_SHUTDOWN_POLL_TIME: time::Duration = time::Duration::from_millis( /// A child process running in its own PGID, with a drop implementation that will kill that /// PGID. /// +/// Will optionally attempt a graceful shutdown first using `SIGINT`. +/// /// TODO: If this API is useful, we should consider extending it to parented Nailgun processes /// and to all local execution in general. It could also be adjusted for sending other posix /// signals in sequence for . pub struct ManagedChild { child: Child, - graceful_shutdown_timeout: time::Duration, - killed: AtomicBool, + graceful_shutdown_timeout: Option, + killed: bool, } impl ManagedChild { pub fn spawn( - mut command: Command, - graceful_shutdown_timeout: time::Duration, - ) -> Result { + command: &mut Command, + graceful_shutdown_timeout: Option, + ) -> std::io::Result { // Set `kill_on_drop` to encourage `tokio` to `wait` the process via its own "reaping" // mechanism: // see https://docs.rs/tokio/1.14.0/tokio/process/struct.Command.html#method.kill_on_drop @@ -47,13 +48,11 @@ impl ManagedChild { }; // Then spawn. - let child = command - .spawn() - .map_err(|e| format!("Error executing interactive process: {e}"))?; + let child = command.spawn()?; Ok(Self { child, graceful_shutdown_timeout, - killed: AtomicBool::new(false), + killed: false, }) } @@ -100,49 +99,63 @@ impl ManagedChild { &mut self, max_wait_duration: time::Duration, ) -> Result { + let maybe_id = self.child.id(); let deadline = time::Instant::now() + max_wait_duration; while time::Instant::now() <= deadline { if self.check_child_has_exited()? { return Ok(true); } + log::debug!("Waiting for {:?} to exit...", maybe_id); thread::sleep(GRACEFUL_SHUTDOWN_POLL_TIME); } - // if we get here we have timed-out + // If we get here we have timed-out. Ok(false) } - /// Attempt to gracefully shutdown the process. + /// Attempt to shutdown the process (gracefully, if was configured that way at creation). /// - /// This will send a SIGINT to the process and give it a chance to shutdown gracefully. If the + /// Graceful shutdown will send a SIGINT to the process and give it a chance to exit. If the /// process does not respond to the SIGINT within a fixed interval, a SIGKILL will be sent. /// - /// This method *will* block the current thread but will do so for a bounded amount of time. - pub fn graceful_shutdown_sync(&mut self) -> Result<(), String> { - self.signal_pg(signal::Signal::SIGINT)?; - match self.wait_for_child_exit_sync(self.graceful_shutdown_timeout) { - Ok(true) => { - // process was gracefully shutdown - self.killed.store(true, Ordering::SeqCst); - Ok(()) - } - Ok(false) => { - // we timed out waiting for the child to exit, so we need to kill it. - log::warn!( - "Timed out waiting for graceful shutdown of process group. Will try SIGKILL instead." - ); - self.kill_pgid() - } - Err(e) => { - log::warn!("An error occurred while waiting for graceful shutdown of process group ({}). Will try SIGKILL instead.", e); - self.kill_pgid() + /// NB: This method *will* block the current thread but it will do so for a bounded amount of time, + /// as long as the operating system responds to `SIGKILL` in a bounded amount of time. + /// + /// TODO: Async drop might eventually allow for making this blocking more explicit. + /// + pub fn attempt_shutdown_sync(&mut self) -> Result<(), String> { + if let Some(graceful_shutdown_timeout) = self.graceful_shutdown_timeout { + // If we fail to send SIGINT, then we will also fail to send SIGKILL, so we return eagerly + // on error here. + self.signal_pg(signal::Signal::SIGINT)?; + match self.wait_for_child_exit_sync(graceful_shutdown_timeout) { + Ok(true) => { + // Process was gracefully shutdown: return. + self.killed = true; + return Ok(()); + } + Ok(false) => { + // We timed out waiting for the child to exit, so we need to kill it. + log::warn!( + "Timed out waiting for graceful shutdown of process group. Will try SIGKILL instead." + ); + } + Err(e) => { + log::warn!("An error occurred while waiting for graceful shutdown of process group ({}). Will try SIGKILL instead.", e); + } } } + + self.kill_pgid() } /// Kill the process's unique PGID or return an error if we don't have a PID or cannot kill. fn kill_pgid(&mut self) -> Result<(), String> { self.signal_pg(signal::Signal::SIGKILL)?; - self.killed.store(true, Ordering::SeqCst); + // NB: Since the SIGKILL was successfully delivered above, the only things that could cause the + // child not to eventually exit would be if it had become a zombie (which shouldn't be possible, + // because we are its parent process, and we are still alive). + let _ = self.wait_for_child_exit_sync(time::Duration::from_secs(1800))?; + self.killed = true; Ok(()) } } @@ -164,8 +177,8 @@ impl DerefMut for ManagedChild { /// Implements drop by killing the process group. impl Drop for ManagedChild { fn drop(&mut self) { - if !self.killed.load(Ordering::SeqCst) { - let _ = self.graceful_shutdown_sync(); + if !self.killed { + let _ = self.attempt_shutdown_sync(); } } } diff --git a/src/rust/engine/process_execution/src/local.rs b/src/rust/engine/process_execution/src/local.rs index 16b7133d8fc..7bde6ce71a0 100644 --- a/src/rust/engine/process_execution/src/local.rs +++ b/src/rust/engine/process_execution/src/local.rs @@ -1,7 +1,6 @@ // Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; -use std::ffi::OsStr; use std::fmt::{self, Debug}; use std::io::Write; use std::ops::Neg; @@ -30,14 +29,14 @@ use store::{ }; use task_executor::Executor; use tempfile::TempDir; -use tokio::process::{Child, Command}; +use tokio::process::Command; use tokio::sync::RwLock; use tokio::time::{timeout, Duration}; use tokio_util::codec::{BytesCodec, FramedRead}; use workunit_store::{in_workunit, Level, Metric, RunningWorkunit}; use crate::{ - Context, FallibleProcessResultWithPlatform, NamedCaches, Process, ProcessError, + Context, FallibleProcessResultWithPlatform, ManagedChild, NamedCaches, Process, ProcessError, ProcessResultMetadata, ProcessResultSource, }; @@ -147,66 +146,6 @@ impl Debug for CommandRunner { } } -pub struct HermeticCommand { - inner: Command, -} - -/// -/// A command that accepts no input stream and does not consult the `PATH`. -/// -impl HermeticCommand { - fn new>(program: S) -> HermeticCommand { - let mut inner = Command::new(program); - inner - // TODO: This will not universally prevent child processes continuing to run in the - // background, because killing a pantsd client with Ctrl+C kills the server with a signal, - // which won't currently result in an orderly dropping of everything in the graph. See #10004. - .kill_on_drop(true) - .env_clear() - // It would be really nice not to have to manually set PATH but this is sadly the only way - // to stop automatic PATH searching. - .env("PATH", ""); - HermeticCommand { inner } - } - - fn args(&mut self, args: I) -> &mut HermeticCommand - where - I: IntoIterator, - S: AsRef, - { - self.inner.args(args); - self - } - - fn envs(&mut self, vars: I) -> &mut HermeticCommand - where - I: IntoIterator, - K: AsRef, - V: AsRef, - { - self.inner.envs(vars); - self - } - - fn current_dir>(&mut self, dir: P) -> &mut HermeticCommand { - self.inner.current_dir(dir); - self - } - - fn spawn, E: Into>( - &mut self, - stdout: O, - stderr: E, - ) -> std::io::Result { - self - .inner - .stdin(Stdio::null()) - .stdout(stdout) - .stderr(stderr) - .spawn() - } -} - // TODO: A Stream that ends with `Exit` is error prone: we should consider creating a Child struct // similar to nails::server::Child (which is itself shaped like `std::process::Child`). // See https://github.com/stuhood/nails/issues/1 for more info. @@ -283,6 +222,10 @@ impl super::CommandRunner for CommandRunner { .await?; workunit.increment_counter(Metric::LocalExecutionRequests, 1); + // NB: The constraint on `CapturedWorkdir` is that any child processes spawned here have + // exited (or been killed in their `Drop` handlers), so this function can rely on the usual + // Drop order of local variables to assume that the sandbox is cleaned up after the process + // is. let res = self .run_and_capture_workdir( req.clone(), @@ -348,8 +291,18 @@ impl CapturedWorkdir for CommandRunner { } else { workdir_path.to_owned() }; - let mut command = HermeticCommand::new(&req.argv[0]); - command.args(&req.argv[1..]).current_dir(cwd).envs(&req.env); + let mut command = Command::new(&req.argv[0]); + command + .env_clear() + // It would be really nice not to have to manually set PATH but this is sadly the only way + // to stop automatic PATH searching. + .env("PATH", "") + .args(&req.argv[1..]) + .current_dir(cwd) + .envs(&req.env) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); // See the documentation of the `CapturedWorkdir::run_in_workdir` method, but `exclusive_spawn` // indicates the binary we're spawning was written out by the current thread, and, as such, @@ -368,7 +321,7 @@ impl CapturedWorkdir for CommandRunner { // // See: https://github.com/golang/go/issues/22315 for an excellent description of this generic // unix problem. - let mut fork_exec = move || command.spawn(Stdio::piped(), Stdio::piped()); + let mut fork_exec = move || ManagedChild::spawn(&mut command, None); let mut child = { if exclusive_spawn { let _write_locked = self.spawn_lock.write().await; @@ -573,6 +526,11 @@ pub trait CapturedWorkdir { /// /// Spawn the given process in a working directory prepared with its expected input digest. /// + /// NB: The implementer of this method must guarantee that the spawned process has completely + /// exited when the returned BoxStream is Dropped. Otherwise it might be possible for the process + /// to observe the working directory that it is running in being torn down. In most cases, this + /// requires Drop handlers to synchronously wait for their child processes to exit. + /// /// If the process to be executed has an `argv[0]` that points into its input digest then /// `exclusive_spawn` will be `true` and the spawn implementation should account for the /// possibility of concurrent fork+exec holding open the cloned `argv[0]` file descriptor, which, diff --git a/src/rust/engine/src/intrinsics.rs b/src/rust/engine/src/intrinsics.rs index 47b317a0f4c..f928fbfb6b7 100644 --- a/src/rust/engine/src/intrinsics.rs +++ b/src/rust/engine/src/intrinsics.rs @@ -632,12 +632,14 @@ fn interactive_process( .try_clone_as_file() .map_err(|e| format!("Couldn't clone stderr: {e}"))?, )); - let mut subprocess = ManagedChild::spawn(command, context.core.graceful_shutdown_timeout)?; + let mut subprocess = + ManagedChild::spawn(&mut command, Some(context.core.graceful_shutdown_timeout)) + .map_err(|e| format!("Error executing interactive process: {e}"))?; tokio::select! { _ = session.cancelled() => { // The Session was cancelled: attempt to kill the process group / process, and // then wait for it to exit (to avoid zombies). - if let Err(e) = subprocess.graceful_shutdown_sync() { + if let Err(e) = subprocess.attempt_shutdown_sync() { // Failed to kill the PGID: try the non-group form. log::warn!("Failed to kill spawned process group ({}). Will try killing only the top process.\n\ This is unexpected: please file an issue about this problem at \