-
Notifications
You must be signed in to change notification settings - Fork 13.3k
std::os - Add join_paths, make setenv non-utf8 capable #15280
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,7 +43,7 @@ use path::{Path, GenericPath, BytesContainer}; | |
use ptr::RawPtr; | ||
use ptr; | ||
use result::{Err, Ok, Result}; | ||
use slice::{Vector, ImmutableVector, MutableVector}; | ||
use slice::{Vector, ImmutableVector, MutableVector, ImmutableEqVector}; | ||
use str::{Str, StrSlice, StrAllocating}; | ||
use str; | ||
use string::String; | ||
|
@@ -396,9 +396,9 @@ pub fn getenv_as_bytes(n: &str) -> Option<Vec<u8>> { | |
/// None => println!("{} is not defined in the environment.", key) | ||
/// } | ||
/// ``` | ||
pub fn setenv(n: &str, v: &str) { | ||
pub fn setenv<T: BytesContainer>(n: &str, v: T) { | ||
#[cfg(unix)] | ||
fn _setenv(n: &str, v: &str) { | ||
fn _setenv(n: &str, v: &[u8]) { | ||
unsafe { | ||
with_env_lock(|| { | ||
n.with_c_str(|nbuf| { | ||
|
@@ -411,18 +411,20 @@ pub fn setenv(n: &str, v: &str) { | |
} | ||
|
||
#[cfg(windows)] | ||
fn _setenv(n: &str, v: &str) { | ||
fn _setenv(n: &str, v: &[u8]) { | ||
let n: Vec<u16> = n.utf16_units().collect(); | ||
let n = n.append_one(0); | ||
let v: Vec<u16> = v.utf16_units().collect(); | ||
let v: Vec<u16> = str::from_utf8(v).unwrap().utf16_units().collect(); | ||
let v = v.append_one(0); | ||
|
||
unsafe { | ||
with_env_lock(|| { | ||
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr()); | ||
}) | ||
} | ||
} | ||
_setenv(n, v) | ||
|
||
_setenv(n, v.container_as_bytes()) | ||
} | ||
|
||
/// Remove a variable from the environment entirely. | ||
|
@@ -451,17 +453,15 @@ pub fn unsetenv(n: &str) { | |
_unsetenv(n); | ||
} | ||
|
||
#[cfg(unix)] | ||
/// Parse a string or vector according to the platform's conventions | ||
/// for the `PATH` environment variable and return a Vec<Path>. | ||
/// Drops empty paths. | ||
/// Parses input according to platform conventions for the `PATH` | ||
/// environment variable. | ||
/// | ||
/// # Example | ||
/// ```rust | ||
/// use std::os; | ||
/// | ||
/// let key = "PATH"; | ||
/// match os::getenv(key) { | ||
/// match os::getenv_as_bytes(key) { | ||
/// Some(paths) => { | ||
/// for path in os::split_paths(paths).iter() { | ||
/// println!("'{}'", path.display()); | ||
|
@@ -471,57 +471,112 @@ pub fn unsetenv(n: &str) { | |
/// } | ||
/// ``` | ||
pub fn split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> { | ||
unparsed.container_as_bytes() | ||
.split(|b| *b == ':' as u8) | ||
.filter(|s| s.len() > 0) | ||
.map(Path::new) | ||
.collect() | ||
} | ||
#[cfg(unix)] | ||
fn _split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> { | ||
unparsed.container_as_bytes() | ||
.split(|b| *b == b':') | ||
.map(Path::new) | ||
.collect() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately it turns out that it's a bit more complicated than expected. After investigating how bash behaves, it's a bit more ad-hoc and less elegant than this. Notably:
Is it worth trying to match bash? I think it's worth doing at least something here, because upon reflection it seems wrong for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The simplest thing to do here would really to just special-case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would personally view us as pretty distinct from bash, and along those lines pretty separate from it. These are pretty obscure edge cases, so I'd almost just rather err on the side of simplicity rather than completeness at this point in time (this is a good patch to have regardless). We can always end up tweaking the behavior later on with minimal impact on clients. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are different from bash, yes, but bash is a good standard for how Glancing at the I suppose |
||
} | ||
|
||
#[cfg(windows)] | ||
/// Parse a string or vector according to the platform's conventions | ||
/// for the `PATH` environment variable. Drops empty paths. | ||
pub fn split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> { | ||
// On Windows, the PATH environment variable is semicolon separated. Double | ||
// quotes are used as a way of introducing literal semicolons (since | ||
// c:\some;dir is a valid Windows path). Double quotes are not themselves | ||
// permitted in path names, so there is no way to escape a double quote. | ||
// Quoted regions can appear in arbitrary locations, so | ||
// | ||
// c:\foo;c:\som"e;di"r;c:\bar | ||
// | ||
// Should parse as [c:\foo, c:\some;dir, c:\bar]. | ||
// | ||
// (The above is based on testing; there is no clear reference available | ||
// for the grammar.) | ||
|
||
let mut parsed = Vec::new(); | ||
let mut in_progress = Vec::new(); | ||
let mut in_quote = false; | ||
|
||
for b in unparsed.container_as_bytes().iter() { | ||
match *b as char { | ||
';' if !in_quote => { | ||
// ignore zero-length path strings | ||
if in_progress.len() > 0 { | ||
#[cfg(windows)] | ||
pub fn _split_paths<T: BytesContainer>(unparsed: T) -> Vec<Path> { | ||
// On Windows, the PATH environment variable is semicolon separated. Double | ||
// quotes are used as a way of introducing literal semicolons (since | ||
// c:\some;dir is a valid Windows path). Double quotes are not themselves | ||
// permitted in path names, so there is no way to escape a double quote. | ||
// Quoted regions can appear in arbitrary locations, so | ||
// | ||
// c:\foo;c:\som"e;di"r;c:\bar | ||
// | ||
// Should parse as [c:\foo, c:\some;dir, c:\bar]. | ||
// | ||
// (The above is based on testing; there is no clear reference available | ||
// for the grammar.) | ||
|
||
let mut parsed = Vec::new(); | ||
let mut in_progress = Vec::new(); | ||
let mut in_quote = false; | ||
|
||
for b in unparsed.container_as_bytes().iter() { | ||
match *b { | ||
b';' if !in_quote => { | ||
parsed.push(Path::new(in_progress.as_slice())); | ||
in_progress.truncate(0) | ||
} | ||
b'"' => { | ||
in_quote = !in_quote; | ||
} | ||
_ => { | ||
in_progress.push(*b); | ||
} | ||
in_progress.truncate(0) | ||
} | ||
'\"' => { | ||
in_quote = !in_quote; | ||
} | ||
_ => { | ||
in_progress.push(*b); | ||
} | ||
parsed.push(Path::new(in_progress)); | ||
parsed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding the above notes on how bash splits paths, I am curious how Windows handles the same circumstances (empty paths, paths containing just semicolons, etc). |
||
} | ||
|
||
_split_paths(unparsed) | ||
} | ||
|
||
/// Joins a collection of `Path`s appropriately for the `PATH` | ||
/// environment variable. | ||
/// | ||
/// Returns a `Vec<u8>` on success, since `Path`s are not utf-8 | ||
/// encoded on all platforms. | ||
/// | ||
/// Returns an `Err` (containing an error message) if one of the input | ||
/// `Path`s contains an invalid character for constructing the `PATH` | ||
/// variable (a double quote on Windows or a colon on Unix). | ||
/// | ||
/// # Example | ||
/// | ||
/// ```rust | ||
/// use std::os; | ||
/// use std::path::Path; | ||
/// | ||
/// let key = "PATH"; | ||
/// let mut paths = os::getenv_as_bytes(key).map_or(Vec::new(), os::split_paths); | ||
/// paths.push(Path::new("/home/xyz/bin")); | ||
/// os::setenv(key, os::join_paths(paths.as_slice()).unwrap()); | ||
/// ``` | ||
pub fn join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> { | ||
#[cfg(windows)] | ||
fn _join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> { | ||
let mut joined = Vec::new(); | ||
let sep = b';'; | ||
|
||
for (i, path) in paths.iter().map(|p| p.container_as_bytes()).enumerate() { | ||
if i > 0 { joined.push(sep) } | ||
if path.contains(&b'"') { | ||
return Err("path segment contains `\"`"); | ||
} else if path.contains(&sep) { | ||
joined.push(b'"'); | ||
joined.push_all(path); | ||
joined.push(b'"'); | ||
} else { | ||
joined.push_all(path); | ||
} | ||
} | ||
|
||
Ok(joined) | ||
} | ||
|
||
if in_progress.len() > 0 { | ||
parsed.push(Path::new(in_progress)); | ||
#[cfg(unix)] | ||
fn _join_paths<T: BytesContainer>(paths: &[T]) -> Result<Vec<u8>, &'static str> { | ||
let mut joined = Vec::new(); | ||
let sep = b':'; | ||
|
||
for (i, path) in paths.iter().map(|p| p.container_as_bytes()).enumerate() { | ||
if i > 0 { joined.push(sep) } | ||
if path.contains(&sep) { return Err("path segment contains separator `:`") } | ||
joined.push_all(path); | ||
} | ||
|
||
Ok(joined) | ||
} | ||
|
||
parsed | ||
_join_paths(paths) | ||
} | ||
|
||
/// A low-level OS in-memory pipe. | ||
|
@@ -1765,7 +1820,7 @@ mod tests { | |
use c_str::ToCStr; | ||
use option; | ||
use os::{env, getcwd, getenv, make_absolute}; | ||
use os::{split_paths, setenv, unsetenv}; | ||
use os::{split_paths, join_paths, setenv, unsetenv}; | ||
use os; | ||
use rand::Rng; | ||
use rand; | ||
|
@@ -2030,11 +2085,11 @@ mod tests { | |
parsed.iter().map(|s| Path::new(*s)).collect() | ||
} | ||
|
||
assert!(check_parse("", [])); | ||
assert!(check_parse(r#""""#, [])); | ||
assert!(check_parse(";;", [])); | ||
assert!(check_parse("", [""])); | ||
assert!(check_parse(r#""""#, [""])); | ||
assert!(check_parse(";;", ["", "", ""])); | ||
assert!(check_parse(r"c:\", [r"c:\"])); | ||
assert!(check_parse(r"c:\;", [r"c:\"])); | ||
assert!(check_parse(r"c:\;", [r"c:\", ""])); | ||
assert!(check_parse(r"c:\;c:\Program Files\", | ||
[r"c:\", r"c:\Program Files\"])); | ||
assert!(check_parse(r#"c:\;c:\"foo"\"#, [r"c:\", r"c:\foo\"])); | ||
|
@@ -2050,12 +2105,44 @@ mod tests { | |
parsed.iter().map(|s| Path::new(*s)).collect() | ||
} | ||
|
||
assert!(check_parse("", [])); | ||
assert!(check_parse("::", [])); | ||
assert!(check_parse("", [""])); | ||
assert!(check_parse("::", ["", "", ""])); | ||
assert!(check_parse("/", ["/"])); | ||
assert!(check_parse("/:", ["/"])); | ||
assert!(check_parse("/:", ["/", ""])); | ||
assert!(check_parse("/:/usr/local", ["/", "/usr/local"])); | ||
} | ||
|
||
#[test] | ||
#[cfg(unix)] | ||
fn join_paths_unix() { | ||
fn test_eq(input: &[&str], output: &str) -> bool { | ||
join_paths(input).unwrap().as_slice() == output.as_bytes() | ||
} | ||
|
||
assert!(test_eq([], "")); | ||
assert!(test_eq(["/bin", "/usr/bin", "/usr/local/bin"], | ||
"/bin:/usr/bin:/usr/local/bin")); | ||
assert!(test_eq(["", "/bin", "", "", "/usr/bin", ""], | ||
":/bin:::/usr/bin:")); | ||
assert!(join_paths(["/te:st"]).is_err()); | ||
} | ||
|
||
#[test] | ||
#[cfg(windows)] | ||
fn join_paths_windows() { | ||
fn test_eq(input: &[&str], output: &str) -> bool { | ||
join_paths(input).unwrap().as_slice() == output.as_bytes() | ||
} | ||
|
||
assert!(test_eq([], "")); | ||
assert!(test_eq([r"c:\windows", r"c:\"], | ||
r"c:\windows;c:\")); | ||
assert!(test_eq(["", r"c:\windows", "", "", r"c:\", ""], | ||
r";c:\windows;;;c:\;")); | ||
assert!(test_eq([r"c:\te;st", r"c:\"], | ||
r#""c:\te;st";c:\"#)); | ||
assert!(join_paths([r#"c:\te"st"#]).is_err()); | ||
} | ||
|
||
// More recursive_mkdir tests are in extra::tempfile | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
str::from_utf8(v).unwrap()
introduces failure if the input is not valid utf-8.Perhaps
setenv()
needs abool
return value, with documentation stating that it may returnfalse
on Windows (and only Windows).