Skip to content

std::os - Add join_paths, make setenv non-utf8 capable #15280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 147 additions & 60 deletions src/libstd/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ use path::{Path, GenericPath, BytesContainer};
use ptr::RawPtr;
use ptr;
use result::{Err, Ok, Result};
use slice::{Vector, ImmutableVector, MutableVector};
use slice::{Vector, ImmutableVector, MutableVector, ImmutableEqVector};
use str::{Str, StrSlice, StrAllocating};
use str;
use string::String;
Expand Down Expand Up @@ -396,9 +396,9 @@ pub fn getenv_as_bytes(n: &str) -> Option<Vec<u8>> {
/// None => println!("{} is not defined in the environment.", key)
/// }
/// ```
pub fn setenv(n: &str, v: &str) {
pub fn setenv<T: BytesContainer>(n: &str, v: T) {
#[cfg(unix)]
fn _setenv(n: &str, v: &str) {
fn _setenv(n: &str, v: &[u8]) {
unsafe {
with_env_lock(|| {
n.with_c_str(|nbuf| {
Expand All @@ -411,18 +411,20 @@ pub fn setenv(n: &str, v: &str) {
}

#[cfg(windows)]
fn _setenv(n: &str, v: &str) {
fn _setenv(n: &str, v: &[u8]) {
let n: Vec<u16> = n.utf16_units().collect();
let n = n.append_one(0);
let v: Vec<u16> = v.utf16_units().collect();
let v: Vec<u16> = str::from_utf8(v).unwrap().utf16_units().collect();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

str::from_utf8(v).unwrap() introduces failure if the input is not valid utf-8.

Perhaps setenv() needs a bool return value, with documentation stating that it may return false on Windows (and only Windows).

let v = v.append_one(0);

unsafe {
with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
})
}
}
_setenv(n, v)

_setenv(n, v.container_as_bytes())
}

/// Remove a variable from the environment entirely.
Expand Down Expand Up @@ -451,17 +453,15 @@ pub fn unsetenv(n: &str) {
_unsetenv(n);
}

#[cfg(unix)]
/// Parse a string or vector according to the platform's conventions
/// for the `PATH` environment variable and return a Vec<Path>.
/// Drops empty paths.
/// Parses input according to platform conventions for the `PATH`
/// environment variable.
///
/// # Example
/// ```rust
/// use std::os;
///
/// let key = "PATH";
/// match os::getenv(key) {
/// match os::getenv_as_bytes(key) {
/// Some(paths) => {
/// for path in os::split_paths(paths).iter() {
/// println!("'{}'", path.display());
Expand All @@ -471,57 +471,112 @@ pub fn unsetenv(n: &str) {
/// }
/// ```
pub fn split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
unparsed.container_as_bytes()
.split(|b| *b == ':' as u8)
.filter(|s| s.len() > 0)
.map(Path::new)
.collect()
}
#[cfg(unix)]
fn _split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
unparsed.container_as_bytes()
.split(|b| *b == b':')
.map(Path::new)
.collect()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately it turns out that it's a bit more complicated than expected.

After investigating how bash behaves, it's a bit more ad-hoc and less elegant than this. Notably:

  • An empty PATH yields zero paths, whereas this code yields a single "." path
  • PATH=":" yields a single "." path
  • Multiple colons in a row behaves rather unexpectedly:
    • When the PATH contains nothing but colons, the number of empty paths yielded is floor(ncolon/2)+1. That is, 1 colon is 1 path, 2-3 colons are 2 paths, 4-5 colons are 3 paths, etc.
    • When the PATH contains multiple colons in a row with at least one non-empty path, it's a little more sensible. The number of empty paths is just ceil(ncolon/2).

Is it worth trying to match bash? I think it's worth doing at least something here, because upon reflection it seems wrong for split_paths("") to return a path (and similarly, split_paths(":") should probably then return a single empty path, although that's more open to debate).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The simplest thing to do here would really to just special-case unparsed.container_as_bytes().len() == 0 to return vec![] (which would presumably happen in the outer split_paths() so it applies to Windows too). But I'd welcome input from anyone else as to whether it's worth trying to behave closer to bash, or whether this one change is sufficient.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would personally view us as pretty distinct from bash, and along those lines pretty separate from it. These are pretty obscure edge cases, so I'd almost just rather err on the side of simplicity rather than completeness at this point in time (this is a good patch to have regardless).

We can always end up tweaking the behavior later on with minimal impact on clients.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are different from bash, yes, but bash is a good standard for how PATH is to be interpreted. The only more authoritative source would be execvp(), although I chose bash largely because I ran across a reference to the code bash uses to implement PATH-splitting.

Glancing at the execvp() impl for some version of libc on OS X, it behaves much more like our current implementation, where every empty component is treated as ., and an empty PATH has a single empty component.

I suppose execvp() provides sufficient justification for leaving things as they are.

}

#[cfg(windows)]
/// Parse a string or vector according to the platform's conventions
/// for the `PATH` environment variable. Drops empty paths.
pub fn split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
// On Windows, the PATH environment variable is semicolon separated. Double
// quotes are used as a way of introducing literal semicolons (since
// c:\some;dir is a valid Windows path). Double quotes are not themselves
// permitted in path names, so there is no way to escape a double quote.
// Quoted regions can appear in arbitrary locations, so
//
// c:\foo;c:\som"e;di"r;c:\bar
//
// Should parse as [c:\foo, c:\some;dir, c:\bar].
//
// (The above is based on testing; there is no clear reference available
// for the grammar.)

let mut parsed = Vec::new();
let mut in_progress = Vec::new();
let mut in_quote = false;

for b in unparsed.container_as_bytes().iter() {
match *b as char {
';' if !in_quote => {
// ignore zero-length path strings
if in_progress.len() > 0 {
#[cfg(windows)]
pub fn _split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> {
// On Windows, the PATH environment variable is semicolon separated. Double
// quotes are used as a way of introducing literal semicolons (since
// c:\some;dir is a valid Windows path). Double quotes are not themselves
// permitted in path names, so there is no way to escape a double quote.
// Quoted regions can appear in arbitrary locations, so
//
// c:\foo;c:\som"e;di"r;c:\bar
//
// Should parse as [c:\foo, c:\some;dir, c:\bar].
//
// (The above is based on testing; there is no clear reference available
// for the grammar.)

let mut parsed = Vec::new();
let mut in_progress = Vec::new();
let mut in_quote = false;

for b in unparsed.container_as_bytes().iter() {
match *b {
b';' if !in_quote => {
parsed.push(Path::new(in_progress.as_slice()));
in_progress.truncate(0)
}
b'"' => {
in_quote = !in_quote;
}
_ => {
in_progress.push(*b);
}
in_progress.truncate(0)
}
'\"' => {
in_quote = !in_quote;
}
_ => {
in_progress.push(*b);
}
parsed.push(Path::new(in_progress));
parsed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding the above notes on how bash splits paths, I am curious how Windows handles the same circumstances (empty paths, paths containing just semicolons, etc).

}

_split_paths(unparsed)
}

/// Joins a collection of `Path`s appropriately for the `PATH`
/// environment variable.
///
/// Returns a `Vec<u8>` on success, since `Path`s are not utf-8
/// encoded on all platforms.
///
/// Returns an `Err` (containing an error message) if one of the input
/// `Path`s contains an invalid character for constructing the `PATH`
/// variable (a double quote on Windows or a colon on Unix).
///
/// # Example
///
/// ```rust
/// use std::os;
/// use std::path::Path;
///
/// let key = "PATH";
/// let mut paths = os::getenv_as_bytes(key).map_or(Vec::new(), os::split_paths);
/// paths.push(Path::new("/home/xyz/bin"));
/// os::setenv(key, os::join_paths(paths.as_slice()).unwrap());
/// ```
pub fn join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> {
#[cfg(windows)]
fn _join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> {
let mut joined = Vec::new();
let sep = b';';

for (i, path) in paths.iter().map(|p| p.container_as_bytes()).enumerate() {
if i > 0 { joined.push(sep) }
if path.contains(&b'"') {
return Err("path segment contains `\"`");
} else if path.contains(&sep) {
joined.push(b'"');
joined.push_all(path);
joined.push(b'"');
} else {
joined.push_all(path);
}
}

Ok(joined)
}

if in_progress.len() > 0 {
parsed.push(Path::new(in_progress));
#[cfg(unix)]
fn _join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> {
let mut joined = Vec::new();
let sep = b':';

for (i, path) in paths.iter().map(|p| p.container_as_bytes()).enumerate() {
if i > 0 { joined.push(sep) }
if path.contains(&sep) { return Err("path segment contains separator `:`") }
joined.push_all(path);
}

Ok(joined)
}

parsed
_join_paths(paths)
}

/// A low-level OS in-memory pipe.
Expand Down Expand Up @@ -1765,7 +1820,7 @@ mod tests {
use c_str::ToCStr;
use option;
use os::{env, getcwd, getenv, make_absolute};
use os::{split_paths, setenv, unsetenv};
use os::{split_paths, join_paths, setenv, unsetenv};
use os;
use rand::Rng;
use rand;
Expand Down Expand Up @@ -2030,11 +2085,11 @@ mod tests {
parsed.iter().map(|s| Path::new(*s)).collect()
}

assert!(check_parse("", []));
assert!(check_parse(r#""""#, []));
assert!(check_parse(";;", []));
assert!(check_parse("", [""]));
assert!(check_parse(r#""""#, [""]));
assert!(check_parse(";;", ["", "", ""]));
assert!(check_parse(r"c:\", [r"c:\"]));
assert!(check_parse(r"c:\;", [r"c:\"]));
assert!(check_parse(r"c:\;", [r"c:\", ""]));
assert!(check_parse(r"c:\;c:\Program Files\",
[r"c:\", r"c:\Program Files\"]));
assert!(check_parse(r#"c:\;c:\"foo"\"#, [r"c:\", r"c:\foo\"]));
Expand All @@ -2050,12 +2105,44 @@ mod tests {
parsed.iter().map(|s| Path::new(*s)).collect()
}

assert!(check_parse("", []));
assert!(check_parse("::", []));
assert!(check_parse("", [""]));
assert!(check_parse("::", ["", "", ""]));
assert!(check_parse("/", ["/"]));
assert!(check_parse("/:", ["/"]));
assert!(check_parse("/:", ["/", ""]));
assert!(check_parse("/:/usr/local", ["/", "/usr/local"]));
}

#[test]
#[cfg(unix)]
fn join_paths_unix() {
fn test_eq(input: &[&str], output: &str) -> bool {
join_paths(input).unwrap().as_slice() == output.as_bytes()
}

assert!(test_eq([], ""));
assert!(test_eq(["/bin", "/usr/bin", "/usr/local/bin"],
"/bin:/usr/bin:/usr/local/bin"));
assert!(test_eq(["", "/bin", "", "", "/usr/bin", ""],
":/bin:::/usr/bin:"));
assert!(join_paths(["/te:st"]).is_err());
}

#[test]
#[cfg(windows)]
fn join_paths_windows() {
fn test_eq(input: &[&str], output: &str) -> bool {
join_paths(input).unwrap().as_slice() == output.as_bytes()
}

assert!(test_eq([], ""));
assert!(test_eq([r"c:\windows", r"c:\"],
r"c:\windows;c:\"));
assert!(test_eq(["", r"c:\windows", "", "", r"c:\", ""],
r";c:\windows;;;c:\;"));
assert!(test_eq([r"c:\te;st", r"c:\"],
r#""c:\te;st";c:\"#));
assert!(join_paths([r#"c:\te"st"#]).is_err());
}

// More recursive_mkdir tests are in extra::tempfile
}