diff --git a/library/std/src/os/linux/mod.rs b/library/std/src/os/linux/mod.rs index f179a524336fc..f7bb63df0c1ff 100644 --- a/library/std/src/os/linux/mod.rs +++ b/library/std/src/os/linux/mod.rs @@ -3,4 +3,5 @@ #![stable(feature = "raw_ext", since = "1.1.0")] pub mod fs; +pub mod process; pub mod raw; diff --git a/library/std/src/os/linux/process.rs b/library/std/src/os/linux/process.rs new file mode 100644 index 0000000000000..661d3cef7a03a --- /dev/null +++ b/library/std/src/os/linux/process.rs @@ -0,0 +1,47 @@ +//! Linux-specific extensions to primitives in the `std::process` module. + +#![unstable(feature = "linux_pidfd", issue = "none")] + +use crate::process; +use crate::sys_common::AsInnerMut; +use crate::io::Result; + +/// Os-specific extensions to [`process::Child`] +/// +/// [`process::Child`]: crate::process::Child +pub trait ChildExt { + /// Obtains the pidfd created for this child process, if available. + /// + /// A pidfd will only ever be available if `create_pidfd(true)` was called + /// when the corresponding `Command` was created. + /// + /// Even if `create_pidfd(true)` is called, a pidfd may not be available + /// due to an older version of Linux being in use, or if + /// some other error occured. + /// + /// See `man pidfd_open` for more details about pidfds. + fn pidfd(&self) -> Result; +} + +/// Os-specific extensions to [`process::Command`] +/// +/// [`process::Command`]: crate::process::Command +pub trait CommandExt { + /// Sets whether or this `Command` will attempt to create a pidfd + /// for the child. If this method is never called, a pidfd will + /// not be crated. + /// + /// The pidfd can be retrieved from the child via [`ChildExt::pidfd`] + /// + /// A pidfd will only be created if it is possible to do so + /// in a guaranteed race-free manner (e.g. if the `clone3` system call is + /// supported). Otherwise, [`ChildExit::pidfd`] will return an error. + fn create_pidfd(&mut self, val: bool) -> &mut process::Command; +} + +impl CommandExt for process::Command { + fn create_pidfd(&mut self, val: bool) -> &mut process::Command { + self.as_inner_mut().create_pidfd(val); + self + } +} diff --git a/library/std/src/process.rs b/library/std/src/process.rs index 3d238b7f764ef..03ac415b12a31 100644 --- a/library/std/src/process.rs +++ b/library/std/src/process.rs @@ -161,7 +161,7 @@ use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner}; /// [`wait`]: Child::wait #[stable(feature = "process", since = "1.0.0")] pub struct Child { - handle: imp::Process, + pub(crate) handle: imp::Process, /// The handle for writing to the child's standard input (stdin), if it has /// been captured. To avoid partially moving diff --git a/library/std/src/sys/unix/process/process_common.rs b/library/std/src/sys/unix/process/process_common.rs index 9ddd4ad4000ef..9f201541c5f4d 100644 --- a/library/std/src/sys/unix/process/process_common.rs +++ b/library/std/src/sys/unix/process/process_common.rs @@ -88,6 +88,7 @@ pub struct Command { stdin: Option, stdout: Option, stderr: Option, + pub(crate) make_pidfd: bool, } // Create a new type for argv, so that we can make it `Send` and `Sync` @@ -149,6 +150,7 @@ impl Command { stdin: None, stdout: None, stderr: None, + make_pidfd: false, } } @@ -181,6 +183,9 @@ impl Command { pub fn gid(&mut self, id: gid_t) { self.gid = Some(id); } + pub fn create_pidfd(&mut self, val: bool) { + self.make_pidfd = val; + } pub fn saw_nul(&self) -> bool { self.saw_nul diff --git a/library/std/src/sys/unix/process/process_unix.rs b/library/std/src/sys/unix/process/process_unix.rs index eb600d2465ca0..41773cd883322 100644 --- a/library/std/src/sys/unix/process/process_unix.rs +++ b/library/std/src/sys/unix/process/process_unix.rs @@ -5,6 +5,7 @@ use crate::ptr; use crate::sys; use crate::sys::cvt; use crate::sys::process::process_common::*; +use crate::sync::atomic::{AtomicBool, Ordering}; use libc::{c_int, gid_t, pid_t, uid_t}; @@ -34,18 +35,7 @@ impl Command { let (input, output) = sys::pipe::anon_pipe()?; - // Whatever happens after the fork is almost for sure going to touch or - // look at the environment in one way or another (PATH in `execvp` or - // accessing the `environ` pointer ourselves). Make sure no other thread - // is accessing the environment when we do the fork itself. - // - // Note that as soon as we're done with the fork there's no need to hold - // a lock any more because the parent won't do anything and the child is - // in its own process. - let result = unsafe { - let _env_lock = sys::os::env_lock(); - cvt(libc::fork())? - }; + let (result, pidfd) = self.do_fork()?; let pid = unsafe { match result { @@ -70,11 +60,11 @@ impl Command { rtassert!(output.write(&bytes).is_ok()); libc::_exit(1) } - n => n, + n => n as pid_t, } }; - let mut p = Process { pid, status: None }; + let mut p = Process { pid, status: None, pidfd }; drop(output); let mut bytes = [0; 8]; @@ -107,6 +97,95 @@ impl Command { } } + // Attempts to fork the process. If successful, returns + // Ok((0, -1)) in the child, and Ok((child_pid, child_pidfd)) in the parent. + fn do_fork(&mut self) -> Result<(libc::c_long, libc::pid_t), io::Error> { + // Whatever happens after the fork is almost for sure going to touch or + // look at thbe environment in one way or another (PATH in `execvp` or + // accessing the `environ` pointer ourselves). Make sure no other thread + // is accessing the environment when we do the fork itself. + // + // Note that as soon as we're done with the fork there's no need to hold + // a lock any more because the parent won't do anything and the child is + // in its own process. + let _env_lock = unsafe { sys::os::env_lock() }; + + // If we fail to create a pidfd for any reason, this will + // stay as -1, which indicates an error + let mut pidfd: libc::pid_t = -1; + + // On Linux, attempt to use the `clone3` syscall, which + // supports more argument (in particular, the ability to create a pidfd). + // If this fails, we will fall through this block to a call to `fork()` + cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + static HAS_CLONE3: AtomicBool = AtomicBool::new(true); + + const CLONE_PIDFD: u64 = 0x00001000; + + #[repr(C)] + struct clone_args { + flags: u64, + pidfd: u64, + child_tid: u64, + parent_tid: u64, + exit_signal: u64, + stack: u64, + stack_size: u64, + tls: u64, + set_tid: u64, + set_tid_size: u64, + cgroup: u64, + } + + syscall! { + fn clone3(cl_args: *mut clone_args, len: libc::size_t) -> libc::c_long + } + + if HAS_CLONE3.load(Ordering::Relaxed) { + let mut flags = 0; + if self.make_pidfd { + flags |= CLONE_PIDFD; + } + + let mut args = clone_args { + flags, + pidfd: &mut pidfd as *mut libc::pid_t as u64, + child_tid: 0, + parent_tid: 0, + exit_signal: libc::SIGCHLD as u64, + stack: 0, + stack_size: 0, + tls: 0, + set_tid: 0, + set_tid_size: 0, + cgroup: 0 + }; + + let args_ptr = &mut args as *mut clone_args; + let args_size = crate::mem::size_of::(); + + let res = cvt(unsafe { clone3(args_ptr, args_size) }); + match res { + Ok(n) => return Ok((n, pidfd)), + Err(e) => match e.raw_os_error() { + // Multiple threads can race to execute this store, + // but that's fine - that just means that multiple threads + // will have tried and failed to execute the same syscall, + // with no other side effects. + Some(libc::ENOSYS) => HAS_CLONE3.store(false, Ordering::Relaxed), + _ => return Err(e) + } + } + } + } + } + // If we get here, we are either not on Linux, + // or we are on Linux and the 'clone3' syscall does not exist + cvt(unsafe { libc::fork() }.into()).map(|res| (res, pidfd)) + } + + pub fn exec(&mut self, default: Stdio) -> io::Error { let envp = self.capture_env(); @@ -252,8 +331,6 @@ impl Command { #[cfg(not(any( target_os = "macos", target_os = "freebsd", - all(target_os = "linux", target_env = "gnu"), - all(target_os = "linux", target_env = "musl"), )))] fn posix_spawn( &mut self, @@ -268,8 +345,6 @@ impl Command { #[cfg(any( target_os = "macos", target_os = "freebsd", - all(target_os = "linux", target_env = "gnu"), - all(target_os = "linux", target_env = "musl"), ))] fn posix_spawn( &mut self, @@ -404,6 +479,12 @@ impl Command { pub struct Process { pid: pid_t, status: Option, + // On Linux, stores the pidfd created for this child. + // This is -1 if the user did not request pidfd creation, + // or if the pidfd could not be created for some reason + // (e.g. the `clone3` syscall was not available). + #[cfg(target_os = "linux")] + pidfd: libc::c_int, } impl Process { @@ -494,3 +575,15 @@ impl fmt::Display for ExitStatus { } } } + +#[cfg(target_os = "linux")] +#[unstable(feature = "linux_pidfd", issue = "none")] +impl crate::os::linux::process::ChildExt for crate::process::Child { + fn pidfd(&self) -> crate::io::Result { + if self.handle.pidfd > 0 { + Ok(self.handle.pidfd) + } else { + Err(crate::io::Error::from(crate::io::ErrorKind::Other)) + } + } +} diff --git a/src/test/ui/command/command-create-pidfd.rs b/src/test/ui/command/command-create-pidfd.rs new file mode 100644 index 0000000000000..248ae3457d715 --- /dev/null +++ b/src/test/ui/command/command-create-pidfd.rs @@ -0,0 +1,27 @@ +// run-pass +// linux-only - pidfds are a linux-specific concept + +#![feature(linux_pidfd)] +use std::os::linux::process::{CommandExt, ChildExt}; +use std::process::Command; + +fn main() { + // We don't assert the precise value, since the standard libarary + // may be opened other file descriptors before our code ran. + let _ = Command::new("echo") + .create_pidfd(true) + .spawn() + .unwrap() + .pidfd().expect("failed to obtain pidfd"); + + let _ = Command::new("echo") + .create_pidfd(false) + .spawn() + .unwrap() + .pidfd().expect_err("pidfd should not have been created when create_pid(false) is set"); + + let _ = Command::new("echo") + .spawn() + .unwrap() + .pidfd().expect_err("pidfd should not have been created"); +}