Skip to content

Commit 8005ccf

Browse files
committed
WIP: optimize process spawning on Linux
By avoiding allocations and sorting when copying environment variables
1 parent 5ea6256 commit 8005ccf

File tree

3 files changed

+97
-8
lines changed

3 files changed

+97
-8
lines changed

library/std/src/sys/unix/process/process_common.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ pub struct Command {
9393
/// `args`, followed by a `null`. Be careful when modifying `program` or
9494
/// `args` to properly update this as well.
9595
argv: Argv,
96-
env: CommandEnv,
96+
pub env: CommandEnv,
9797

9898
program_kind: ProgramKind,
9999
cwd: Option<CString>,
100100
uid: Option<uid_t>,
101101
gid: Option<gid_t>,
102-
saw_nul: bool,
102+
pub saw_nul: bool,
103103
closures: Vec<Box<dyn FnMut() -> io::Result<()> + Send + Sync>>,
104104
groups: Option<Box<[gid_t]>>,
105105
stdin: Option<Stdio>,
@@ -402,7 +402,7 @@ fn os2c(s: &OsStr, saw_nul: &mut bool) -> CString {
402402

403403
// Helper type to manage ownership of the strings within a C-style array.
404404
pub struct CStringArray {
405-
items: Vec<CString>,
405+
pub items: Vec<CString>,
406406
ptrs: Vec<*const c_char>,
407407
}
408408

@@ -426,7 +426,10 @@ impl CStringArray {
426426
}
427427
}
428428

429-
fn construct_envp(env: BTreeMap<OsString, OsString>, saw_nul: &mut bool) -> CStringArray {
429+
pub(crate) fn construct_envp(
430+
env: BTreeMap<OsString, OsString>,
431+
saw_nul: &mut bool,
432+
) -> CStringArray {
430433
let mut result = CStringArray::with_capacity(env.len());
431434
for (mut k, v) in env {
432435
// Reserve additional space for '=' and null terminator

library/std/src/sys/unix/process/process_unix.rs

+88-2
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ use crate::io::{self, Error, ErrorKind};
33
use crate::mem;
44
use crate::num::NonZeroI32;
55
use crate::sys;
6-
use crate::sys::cvt;
76
use crate::sys::process::process_common::*;
7+
use crate::sys::{cvt, memchr};
88
use core::ffi::NonZero_c_int;
99

1010
#[cfg(target_os = "linux")]
1111
use crate::os::linux::process::PidFd;
1212
#[cfg(target_os = "linux")]
1313
use crate::os::unix::io::AsRawFd;
14-
14+
use crate::sys::os::{env_read_lock, environ};
1515
#[cfg(any(
1616
target_os = "macos",
1717
target_os = "watchos",
@@ -29,6 +29,8 @@ use libc::RTP_ID as pid_t;
2929
#[cfg(not(target_os = "vxworks"))]
3030
use libc::{c_int, pid_t};
3131

32+
use crate::collections::HashSet;
33+
use crate::ffi::{CStr, CString};
3234
#[cfg(not(any(
3335
target_os = "vxworks",
3436
target_os = "l4re",
@@ -68,6 +70,87 @@ cfg_if::cfg_if! {
6870
// Command
6971
////////////////////////////////////////////////////////////////////////////////
7072

73+
fn count_env_vars() -> usize {
74+
let mut count = 0;
75+
unsafe {
76+
let _guard = env_read_lock();
77+
let mut environ = *environ();
78+
while !(*environ).is_null() {
79+
environ = environ.add(1);
80+
count += 1;
81+
}
82+
}
83+
count
84+
}
85+
86+
/// Super-duper optimized version of capturing environment variables, that tries to avoid
87+
/// unnecessary allocations and sorting.
88+
fn capture_envp(cmd: &mut Command) -> CStringArray {
89+
use crate::os::unix::ffi::OsStrExt;
90+
91+
// Count the upper bound of environment variables (vars from the environ + vars coming from the
92+
// command).
93+
let env_count_upper_bound = count_env_vars() + cmd.env.vars.len();
94+
95+
let mut env_array = CStringArray::with_capacity(env_count_upper_bound);
96+
97+
// Remember which vars were already set by the user.
98+
// If the user value is Some, we will add the variable to `env_array` and modify `visited`.
99+
// If the user value is None, we will only modify `visited`.
100+
// In either case, a variable with the same name from `environ` will not be added to `env_array`.
101+
let mut visited: HashSet<&[u8]> = HashSet::with_capacity(cmd.env.vars.len());
102+
103+
// First, add user defined variables to `env_array`, and mark the visited ones.
104+
for (key, maybe_value) in cmd.env.vars.iter() {
105+
if let Some(value) = maybe_value {
106+
// One extra byte for '=', and one extra byte for the NULL terminator.
107+
let mut env_var: Vec<u8> =
108+
Vec::with_capacity(key.as_bytes().len() + value.as_bytes().len() + 2);
109+
env_var.extend_from_slice(key.as_bytes());
110+
env_var.push(b'=');
111+
env_var.extend_from_slice(value.as_bytes());
112+
113+
if let Ok(item) = CString::new(env_var) {
114+
env_array.push(item);
115+
} else {
116+
cmd.saw_nul = true;
117+
return env_array;
118+
}
119+
}
120+
visited.insert(key.as_bytes());
121+
}
122+
123+
// Then, if we're not clearing the original environment, go through it, and add each variable
124+
// to env_array if we haven't seen it yet.
125+
if !cmd.env.clear {
126+
unsafe {
127+
let _guard = env_read_lock();
128+
let mut environ = *environ();
129+
if !environ.is_null() {
130+
while !(*environ).is_null() {
131+
let c_str = CStr::from_ptr(*environ);
132+
let key_value = c_str.to_bytes();
133+
if !key_value.is_empty() {
134+
if let Some(pos) = memchr::memchr(b'=', &key_value[1..]).map(|p| p + 1) {
135+
let key = &key_value[..pos];
136+
if !visited.contains(&key) {
137+
env_array.push(CString::from(c_str));
138+
}
139+
}
140+
}
141+
environ = environ.add(1);
142+
}
143+
}
144+
}
145+
}
146+
147+
env_array
148+
}
149+
150+
pub fn capture_env_linux(cmd: &mut Command) -> Option<CStringArray> {
151+
if cmd.env.is_unchanged() { None } else { Some(capture_envp(cmd)) }
152+
}
153+
71154
impl Command {
72155
pub fn spawn(
73156
&mut self,
@@ -76,6 +159,9 @@ impl Command {
76159
) -> io::Result<(Process, StdioPipes)> {
77160
const CLOEXEC_MSG_FOOTER: [u8; 4] = *b"NOEX";
78161

162+
#[cfg(target_os = "linux")]
163+
let envp = capture_env_linux(self);
164+
#[cfg(not(target_os = "linux"))]
79165
let envp = self.capture_env();
80166

81167
if self.saw_nul() {

library/std/src/sys_common/process.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ use crate::sys::process::{EnvKey, ExitStatus, Process, StdioPipes};
1212
// Stores a set of changes to an environment
1313
#[derive(Clone)]
1414
pub struct CommandEnv {
15-
clear: bool,
15+
pub clear: bool,
1616
saw_path: bool,
17-
vars: BTreeMap<EnvKey, Option<OsString>>,
17+
pub vars: BTreeMap<EnvKey, Option<OsString>>,
1818
}
1919

2020
impl Default for CommandEnv {

0 commit comments

Comments
 (0)