diff --git a/library/std/src/os/linux/mod.rs b/library/std/src/os/linux/mod.rs index 94438defc2270..8e7776f6646b5 100644 --- a/library/std/src/os/linux/mod.rs +++ b/library/std/src/os/linux/mod.rs @@ -4,4 +4,5 @@ #![doc(cfg(target_os = "linux"))] pub mod fs; +pub mod process; pub mod raw; diff --git a/library/std/src/os/linux/process.rs b/library/std/src/os/linux/process.rs new file mode 100644 index 0000000000000..91547b8f91622 --- /dev/null +++ b/library/std/src/os/linux/process.rs @@ -0,0 +1,154 @@ +//! Linux-specific extensions to primitives in the `std::process` module. + +#![unstable(feature = "linux_pidfd", issue = "82971")] + +use crate::io::Result; +use crate::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; +use crate::process; +#[cfg(not(doc))] +use crate::sys::fd::FileDesc; +use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner}; + +#[cfg(doc)] +struct FileDesc; + +/// This type represents a file descriptor that refers to a process. +/// +/// A `PidFd` can be obtained by setting the corresponding option on [`Command`] +/// with [`create_pidfd`]. Subsequently, the created pidfd can be retrieved +/// from the [`Child`] by calling [`pidfd`] or [`take_pidfd`]. +/// +/// Example: +/// ```no_run +/// #![feature(linux_pidfd)] +/// use std::os::linux::process::{CommandExt, ChildExt}; +/// use std::process::Command; +/// +/// let mut child = Command::new("echo") +/// .create_pidfd(true) +/// .spawn() +/// .expect("Failed to spawn child"); +/// +/// let pidfd = child +/// .take_pidfd() +/// .expect("Failed to retrieve pidfd"); +/// +/// // The file descriptor will be closed when `pidfd` is dropped. +/// ``` +/// Refer to the man page of [`pidfd_open(2)`] for further details. +/// +/// [`Command`]: process::Command +/// [`create_pidfd`]: CommandExt::create_pidfd +/// [`Child`]: process::Child +/// [`pidfd`]: fn@ChildExt::pidfd +/// [`take_pidfd`]: ChildExt::take_pidfd +/// [`pidfd_open(2)`]: https://man7.org/linux/man-pages/man2/pidfd_open.2.html +#[derive(Debug)] +pub struct PidFd { + inner: FileDesc, +} + +impl AsInner for PidFd { + fn as_inner(&self) -> &FileDesc { + &self.inner + } +} + +impl FromInner for PidFd { + fn from_inner(inner: FileDesc) -> PidFd { + PidFd { inner } + } +} + +impl IntoInner for PidFd { + fn into_inner(self) -> FileDesc { + self.inner + } +} + +impl AsRawFd for PidFd { + fn as_raw_fd(&self) -> RawFd { + self.as_inner().raw() + } +} + +impl FromRawFd for PidFd { + unsafe fn from_raw_fd(fd: RawFd) -> Self { + Self::from_inner(FileDesc::new(fd)) + } +} + +impl IntoRawFd for PidFd { + fn into_raw_fd(self) -> RawFd { + self.into_inner().into_raw() + } +} + +mod private_child_ext { + pub trait Sealed {} + impl Sealed for crate::process::Child {} +} + +/// Os-specific extensions for [`Child`] +/// +/// [`Child`]: process::Child +pub trait ChildExt: private_child_ext::Sealed { + /// Obtains a reference to the [`PidFd`] created for this [`Child`], if available. + /// + /// A pidfd will only be available if its creation was requested with + /// [`create_pidfd`] when the corresponding [`Command`] was created. + /// + /// Even if requested, a pidfd may not be available due to an older + /// version of Linux being in use, or if some other error occurred. + /// + /// [`Command`]: process::Command + /// [`create_pidfd`]: CommandExt::create_pidfd + /// [`Child`]: process::Child + fn pidfd(&self) -> Result<&PidFd>; + + /// Takes ownership of the [`PidFd`] created for this [`Child`], if available. + /// + /// A pidfd will only be available if its creation was requested with + /// [`create_pidfd`] when the corresponding [`Command`] was created. + /// + /// Even if requested, a pidfd may not be available due to an older + /// version of Linux being in use, or if some other error occurred. + /// + /// [`Command`]: process::Command + /// [`create_pidfd`]: CommandExt::create_pidfd + /// [`Child`]: process::Child + fn take_pidfd(&mut self) -> Result; +} + +mod private_command_ext { + pub trait Sealed {} + impl Sealed for crate::process::Command {} +} + +/// Os-specific extensions for [`Command`] +/// +/// [`Command`]: process::Command +pub trait CommandExt: private_command_ext::Sealed { + /// Sets whether a [`PidFd`](struct@PidFd) should be created for the [`Child`] + /// spawned by this [`Command`]. + /// By default, no pidfd will be created. + /// + /// The pidfd can be retrieved from the child with [`pidfd`] or [`take_pidfd`]. + /// + /// A pidfd will only be created if it is possible to do so + /// in a guaranteed race-free manner (e.g. if the `clone3` system call + /// is supported). Otherwise, [`pidfd`] will return an error. + /// + /// [`Command`]: process::Command + /// [`Child`]: process::Child + /// [`pidfd`]: fn@ChildExt::pidfd + /// [`take_pidfd`]: ChildExt::take_pidfd + fn create_pidfd(&mut self, val: bool) -> &mut process::Command; +} + +impl CommandExt for process::Command { + fn create_pidfd(&mut self, val: bool) -> &mut process::Command { + self.as_inner_mut().create_pidfd(val); + self + } +} diff --git a/library/std/src/process.rs b/library/std/src/process.rs index 11a0432ce27a1..99c3369425b06 100644 --- a/library/std/src/process.rs +++ b/library/std/src/process.rs @@ -166,7 +166,7 @@ use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner}; /// [`wait`]: Child::wait #[stable(feature = "process", since = "1.0.0")] pub struct Child { - handle: imp::Process, + pub(crate) handle: imp::Process, /// The handle for writing to the child's standard input (stdin), if it has /// been captured. To avoid partially moving diff --git a/library/std/src/sys/unix/process/process_common.rs b/library/std/src/sys/unix/process/process_common.rs index c5bdd1bda4a7a..80cae5a3d7980 100644 --- a/library/std/src/sys/unix/process/process_common.rs +++ b/library/std/src/sys/unix/process/process_common.rs @@ -79,6 +79,8 @@ pub struct Command { stdin: Option, stdout: Option, stderr: Option, + #[cfg(target_os = "linux")] + create_pidfd: bool, } // Create a new type for argv, so that we can make it `Send` and `Sync` @@ -124,6 +126,7 @@ pub enum Stdio { } impl Command { + #[cfg(not(target_os = "linux"))] pub fn new(program: &OsStr) -> Command { let mut saw_nul = false; let program = os2c(program, &mut saw_nul); @@ -144,6 +147,28 @@ impl Command { } } + #[cfg(target_os = "linux")] + pub fn new(program: &OsStr) -> Command { + let mut saw_nul = false; + let program = os2c(program, &mut saw_nul); + Command { + argv: Argv(vec![program.as_ptr(), ptr::null()]), + args: vec![program.clone()], + program, + env: Default::default(), + cwd: None, + uid: None, + gid: None, + saw_nul, + closures: Vec::new(), + groups: None, + stdin: None, + stdout: None, + stderr: None, + create_pidfd: false, + } + } + pub fn set_arg_0(&mut self, arg: &OsStr) { // Set a new arg0 let arg = os2c(arg, &mut self.saw_nul); @@ -177,6 +202,22 @@ impl Command { self.groups = Some(Box::from(groups)); } + #[cfg(target_os = "linux")] + pub fn create_pidfd(&mut self, val: bool) { + self.create_pidfd = val; + } + + #[cfg(not(target_os = "linux"))] + #[allow(dead_code)] + pub fn get_create_pidfd(&self) -> bool { + false + } + + #[cfg(target_os = "linux")] + pub fn get_create_pidfd(&self) -> bool { + self.create_pidfd + } + pub fn saw_nul(&self) -> bool { self.saw_nul } diff --git a/library/std/src/sys/unix/process/process_unix.rs b/library/std/src/sys/unix/process/process_unix.rs index c888dd0d87d8e..4b210d6af1303 100644 --- a/library/std/src/sys/unix/process/process_unix.rs +++ b/library/std/src/sys/unix/process/process_unix.rs @@ -9,6 +9,12 @@ use crate::sys; use crate::sys::cvt; use crate::sys::process::process_common::*; +#[cfg(target_os = "linux")] +use crate::os::linux::process::PidFd; + +#[cfg(target_os = "linux")] +use crate::sys::weak::syscall; + #[cfg(any( target_os = "macos", target_os = "freebsd", @@ -61,7 +67,8 @@ impl Command { // a lock any more because the parent won't do anything and the child is // in its own process. Thus the parent drops the lock guard while the child // forgets it to avoid unlocking it on a new thread, which would be invalid. - let (env_lock, pid) = unsafe { (sys::os::env_read_lock(), cvt(libc::fork())?) }; + let env_lock = sys::os::env_read_lock(); + let (pid, pidfd) = unsafe { self.do_fork()? }; if pid == 0 { crate::panic::always_abort(); @@ -90,7 +97,7 @@ impl Command { drop(env_lock); drop(output); - let mut p = Process { pid, status: None }; + let mut p = Process::new(pid, pidfd); let mut bytes = [0; 8]; // loop to handle EINTR @@ -122,6 +129,92 @@ impl Command { } } + // Attempts to fork the process. If successful, returns Ok((0, -1)) + // in the child, and Ok((child_pid, -1)) in the parent. + #[cfg(not(target_os = "linux"))] + unsafe fn do_fork(&mut self) -> Result<(pid_t, pid_t), io::Error> { + cvt(libc::fork()).map(|res| (res, -1)) + } + + // Attempts to fork the process. If successful, returns Ok((0, -1)) + // in the child, and Ok((child_pid, child_pidfd)) in the parent. + #[cfg(target_os = "linux")] + unsafe fn do_fork(&mut self) -> Result<(pid_t, pid_t), io::Error> { + use crate::sync::atomic::{AtomicBool, Ordering}; + + static HAS_CLONE3: AtomicBool = AtomicBool::new(true); + const CLONE_PIDFD: u64 = 0x00001000; + + #[repr(C)] + struct clone_args { + flags: u64, + pidfd: u64, + child_tid: u64, + parent_tid: u64, + exit_signal: u64, + stack: u64, + stack_size: u64, + tls: u64, + set_tid: u64, + set_tid_size: u64, + cgroup: u64, + } + + syscall! { + fn clone3(cl_args: *mut clone_args, len: libc::size_t) -> libc::c_long + } + + // If we fail to create a pidfd for any reason, this will + // stay as -1, which indicates an error. + let mut pidfd: pid_t = -1; + + // Attempt to use the `clone3` syscall, which supports more arguments + // (in particular, the ability to create a pidfd). If this fails, + // we will fall through this block to a call to `fork()` + if HAS_CLONE3.load(Ordering::Relaxed) { + let mut flags = 0; + if self.get_create_pidfd() { + flags |= CLONE_PIDFD; + } + + let mut args = clone_args { + flags, + pidfd: &mut pidfd as *mut pid_t as u64, + child_tid: 0, + parent_tid: 0, + exit_signal: libc::SIGCHLD as u64, + stack: 0, + stack_size: 0, + tls: 0, + set_tid: 0, + set_tid_size: 0, + cgroup: 0, + }; + + let args_ptr = &mut args as *mut clone_args; + let args_size = crate::mem::size_of::(); + + let res = cvt(clone3(args_ptr, args_size)); + match res { + Ok(n) => return Ok((n as pid_t, pidfd)), + Err(e) => match e.raw_os_error() { + // Multiple threads can race to execute this store, + // but that's fine - that just means that multiple threads + // will have tried and failed to execute the same syscall, + // with no other side effects. + Some(libc::ENOSYS) => HAS_CLONE3.store(false, Ordering::Relaxed), + // Fallback to fork if `EPERM` is returned. (e.g. blocked by seccomp) + Some(libc::EPERM) => {} + _ => return Err(e), + }, + } + } + + // If we get here, the 'clone3' syscall does not exist + // or we do not have permission to call it + cvt(libc::fork()).map(|res| (res, pidfd)) + } + pub fn exec(&mut self, default: Stdio) -> io::Error { let envp = self.capture_env(); @@ -308,6 +401,7 @@ impl Command { || (self.env_saw_path() && !self.program_is_path()) || !self.get_closures().is_empty() || self.get_groups().is_some() + || self.get_create_pidfd() { return Ok(None); } @@ -352,7 +446,7 @@ impl Command { None => None, }; - let mut p = Process { pid: 0, status: None }; + let mut p = Process::new(0, -1); struct PosixSpawnFileActions<'a>(&'a mut MaybeUninit); @@ -441,9 +535,27 @@ impl Command { pub struct Process { pid: pid_t, status: Option, + // On Linux, stores the pidfd created for this child. + // This is None if the user did not request pidfd creation, + // or if the pidfd could not be created for some reason + // (e.g. the `clone3` syscall was not available). + #[cfg(target_os = "linux")] + pidfd: Option, } impl Process { + #[cfg(target_os = "linux")] + fn new(pid: pid_t, pidfd: pid_t) -> Self { + use crate::sys_common::FromInner; + let pidfd = (pidfd >= 0).then(|| PidFd::from_inner(sys::fd::FileDesc::new(pidfd))); + Process { pid, status: None, pidfd } + } + + #[cfg(not(target_os = "linux"))] + fn new(pid: pid_t, _pidfd: pid_t) -> Self { + Process { pid, status: None } + } + pub fn id(&self) -> u32 { self.pid as u32 } @@ -580,6 +692,24 @@ impl ExitStatusError { } } +#[cfg(target_os = "linux")] +#[unstable(feature = "linux_pidfd", issue = "82971")] +impl crate::os::linux::process::ChildExt for crate::process::Child { + fn pidfd(&self) -> io::Result<&PidFd> { + self.handle + .pidfd + .as_ref() + .ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created.")) + } + + fn take_pidfd(&mut self) -> io::Result { + self.handle + .pidfd + .take() + .ok_or_else(|| Error::new(ErrorKind::Other, "No pidfd was created.")) + } +} + #[cfg(test)] #[path = "process_unix/tests.rs"] mod tests; diff --git a/src/test/ui/command/command-create-pidfd.rs b/src/test/ui/command/command-create-pidfd.rs new file mode 100644 index 0000000000000..93321ac536ab9 --- /dev/null +++ b/src/test/ui/command/command-create-pidfd.rs @@ -0,0 +1,45 @@ +// run-pass +// only-linux - pidfds are a linux-specific concept + +#![feature(linux_pidfd)] +#![feature(rustc_private)] + +extern crate libc; + +use std::io::Error; +use std::os::linux::process::{ChildExt, CommandExt}; +use std::process::Command; + +fn has_clone3() -> bool { + let res = unsafe { libc::syscall(libc::SYS_clone3, 0, 0) }; + let err = (res == -1) + .then(|| Error::last_os_error()) + .expect("probe syscall should not succeed"); + err.raw_os_error() != Some(libc::ENOSYS) +} + +fn main() { + // pidfds require the clone3 syscall + if !has_clone3() { + return; + } + + // We don't assert the precise value, since the standard library + // might have opened other file descriptors before our code runs. + let _ = Command::new("echo") + .create_pidfd(true) + .spawn() + .unwrap() + .pidfd().expect("failed to obtain pidfd"); + + let _ = Command::new("echo") + .create_pidfd(false) + .spawn() + .unwrap() + .pidfd().expect_err("pidfd should not have been created when create_pid(false) is set"); + + let _ = Command::new("echo") + .spawn() + .unwrap() + .pidfd().expect_err("pidfd should not have been created"); +} diff --git a/src/test/ui/command/command-pre-exec.rs b/src/test/ui/command/command-pre-exec.rs index 61914e2293070..10a8b19159e02 100644 --- a/src/test/ui/command/command-pre-exec.rs +++ b/src/test/ui/command/command-pre-exec.rs @@ -8,8 +8,6 @@ // ignore-sgx no processes #![feature(process_exec, rustc_private)] -extern crate libc; - use std::env; use std::io::Error; use std::os::unix::process::CommandExt; @@ -17,6 +15,23 @@ use std::process::Command; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; +#[cfg(not(target_os = "linux"))] +fn getpid() -> u32 { + use std::process; + process::id() +} + +/// We need to directly use the getpid syscall instead of using `process::id()` +/// because the libc wrapper might return incorrect values after a process was +/// forked. +#[cfg(target_os = "linux")] +fn getpid() -> u32 { + extern crate libc; + unsafe { + libc::syscall(libc::SYS_getpid) as _ + } +} + fn main() { if let Some(arg) = env::args().nth(1) { match &arg[..] { @@ -68,14 +83,12 @@ fn main() { }; assert_eq!(output.raw_os_error(), Some(102)); - let pid = unsafe { libc::getpid() }; - assert!(pid >= 0); + let pid = getpid(); let output = unsafe { Command::new(&me) .arg("empty") .pre_exec(move || { - let child = libc::getpid(); - assert!(child >= 0); + let child = getpid(); assert!(pid != child); Ok(()) }) diff --git a/src/test/ui/process/process-panic-after-fork.rs b/src/test/ui/process/process-panic-after-fork.rs index 1ccf6bb051c20..ad749371beac0 100644 --- a/src/test/ui/process/process-panic-after-fork.rs +++ b/src/test/ui/process/process-panic-after-fork.rs @@ -23,6 +23,21 @@ use std::sync::atomic::{AtomicU32, Ordering}; use libc::c_int; +#[cfg(not(target_os = "linux"))] +fn getpid() -> u32 { + process::id() +} + +/// We need to directly use the getpid syscall instead of using `process::id()` +/// because the libc wrapper might return incorrect values after a process was +/// forked. +#[cfg(target_os = "linux")] +fn getpid() -> u32 { + unsafe { + libc::syscall(libc::SYS_getpid) as _ + } +} + /// This stunt allocator allows us to spot heap allocations in the child. struct PidChecking { parent: A, @@ -44,7 +59,7 @@ impl PidChecking { fn check(&self) { let require_pid = self.require_pid.load(Ordering::Acquire); if require_pid != 0 { - let actual_pid = process::id(); + let actual_pid = getpid(); if require_pid != actual_pid { unsafe { libc::raise(libc::SIGUSR1);