Skip to content

Commit fba32ea

Browse files
committed
auto merge of #12283 : kballard/rust/env-args-bytes, r=erickt
Change `os::args()` and `os::env()` to use `str::from_utf8_lossy()`. Add new functions `os::args_as_bytes()` and `os::env_as_bytes()` to retrieve the args/env as byte vectors instead. The existing methods were left returning strings because I expect that the common use-case is to want string handling. Fixes #7188.
2 parents c9f13b4 + d22b164 commit fba32ea

File tree

4 files changed

+148
-42
lines changed

4 files changed

+148
-42
lines changed

src/libstd/c_str.rs

+37-4
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ use str;
7979
use vec::{ImmutableVector, MutableVector};
8080
use vec;
8181
use rt::global_heap::malloc_raw;
82+
use unstable::raw::Slice;
8283

8384
/// The representation of a C String.
8485
///
@@ -169,6 +170,7 @@ impl CString {
169170
}
170171

171172
/// Converts the CString into a `&[u8]` without copying.
173+
/// Includes the terminating NUL byte.
172174
///
173175
/// # Failure
174176
///
@@ -177,7 +179,21 @@ impl CString {
177179
pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
178180
if self.buf.is_null() { fail!("CString is null!"); }
179181
unsafe {
180-
cast::transmute((self.buf, self.len() + 1))
182+
cast::transmute(Slice { data: self.buf, len: self.len() + 1 })
183+
}
184+
}
185+
186+
/// Converts the CString into a `&[u8]` without copying.
187+
/// Does not include the terminating NUL byte.
188+
///
189+
/// # Failure
190+
///
191+
/// Fails if the CString is null.
192+
#[inline]
193+
pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
194+
if self.buf.is_null() { fail!("CString is null!"); }
195+
unsafe {
196+
cast::transmute(Slice { data: self.buf, len: self.len() })
181197
}
182198
}
183199

@@ -189,8 +205,7 @@ impl CString {
189205
/// Fails if the CString is null.
190206
#[inline]
191207
pub fn as_str<'a>(&'a self) -> Option<&'a str> {
192-
let buf = self.as_bytes();
193-
let buf = buf.slice_to(buf.len()-1); // chop off the trailing NUL
208+
let buf = self.as_bytes_no_nul();
194209
str::from_utf8(buf)
195210
}
196211

@@ -417,7 +432,7 @@ mod tests {
417432
let expected = ["zero", "one"];
418433
let mut it = expected.iter();
419434
let result = from_c_multistring(ptr as *libc::c_char, None, |c| {
420-
let cbytes = c.as_bytes().slice_to(c.len());
435+
let cbytes = c.as_bytes_no_nul();
421436
assert_eq!(cbytes, it.next().unwrap().as_bytes());
422437
});
423438
assert_eq!(result, 2);
@@ -552,13 +567,31 @@ mod tests {
552567
assert_eq!(c_str.as_bytes(), bytes!("foo", 0xff, 0));
553568
}
554569

570+
#[test]
571+
fn test_as_bytes_no_nul() {
572+
let c_str = "hello".to_c_str();
573+
assert_eq!(c_str.as_bytes_no_nul(), bytes!("hello"));
574+
let c_str = "".to_c_str();
575+
let exp: &[u8] = [];
576+
assert_eq!(c_str.as_bytes_no_nul(), exp);
577+
let c_str = bytes!("foo", 0xff).to_c_str();
578+
assert_eq!(c_str.as_bytes_no_nul(), bytes!("foo", 0xff));
579+
}
580+
555581
#[test]
556582
#[should_fail]
557583
fn test_as_bytes_fail() {
558584
let c_str = unsafe { CString::new(ptr::null(), false) };
559585
c_str.as_bytes();
560586
}
561587

588+
#[test]
589+
#[should_fail]
590+
fn test_as_bytes_no_nul_fail() {
591+
let c_str = unsafe { CString::new(ptr::null(), false) };
592+
c_str.as_bytes_no_nul();
593+
}
594+
562595
#[test]
563596
fn test_as_str() {
564597
let c_str = "hello".to_c_str();

src/libstd/os.rs

+87-15
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ use ptr::RawPtr;
5353

5454
#[cfg(unix)]
5555
use c_str::ToCStr;
56+
#[cfg(windows)]
57+
use str::OwnedStr;
5658

5759
/// Delegates to the libc close() function, returning the same return value.
5860
pub fn close(fd: int) -> int {
@@ -158,10 +160,23 @@ fn with_env_lock<T>(f: || -> T) -> T {
158160

159161
/// Returns a vector of (variable, value) pairs for all the environment
160162
/// variables of the current process.
163+
///
164+
/// Invalid UTF-8 bytes are replaced with \uFFFD. See `str::from_utf8_lossy()`
165+
/// for details.
161166
pub fn env() -> ~[(~str,~str)] {
167+
env_as_bytes().move_iter().map(|(k,v)| {
168+
let k = str::from_utf8_lossy(k).into_owned();
169+
let v = str::from_utf8_lossy(v).into_owned();
170+
(k,v)
171+
}).collect()
172+
}
173+
174+
/// Returns a vector of (variable, value) byte-vector pairs for all the
175+
/// environment variables of the current process.
176+
pub fn env_as_bytes() -> ~[(~[u8],~[u8])] {
162177
unsafe {
163178
#[cfg(windows)]
164-
unsafe fn get_env_pairs() -> ~[~str] {
179+
unsafe fn get_env_pairs() -> ~[~[u8]] {
165180
use c_str;
166181
use str::StrSlice;
167182

@@ -176,13 +191,15 @@ pub fn env() -> ~[(~str,~str)] {
176191
}
177192
let mut result = ~[];
178193
c_str::from_c_multistring(ch as *c_char, None, |cstr| {
179-
result.push(cstr.as_str().unwrap().to_owned());
194+
result.push(cstr.as_bytes_no_nul().to_owned());
180195
});
181196
FreeEnvironmentStringsA(ch);
182197
result
183198
}
184199
#[cfg(unix)]
185-
unsafe fn get_env_pairs() -> ~[~str] {
200+
unsafe fn get_env_pairs() -> ~[~[u8]] {
201+
use c_str::CString;
202+
186203
extern {
187204
fn rust_env_pairs() -> **c_char;
188205
}
@@ -193,20 +210,19 @@ pub fn env() -> ~[(~str,~str)] {
193210
}
194211
let mut result = ~[];
195212
ptr::array_each(environ, |e| {
196-
let env_pair = str::raw::from_c_str(e);
197-
debug!("get_env_pairs: {}", env_pair);
213+
let env_pair = CString::new(e, false).as_bytes_no_nul().to_owned();
198214
result.push(env_pair);
199215
});
200216
result
201217
}
202218

203-
fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] {
219+
fn env_convert(input: ~[~[u8]]) -> ~[(~[u8], ~[u8])] {
204220
let mut pairs = ~[];
205221
for p in input.iter() {
206-
let vs: ~[&str] = p.splitn('=', 1).collect();
207-
debug!("splitting: len: {}", vs.len());
208-
assert_eq!(vs.len(), 2);
209-
pairs.push((vs[0].to_owned(), vs[1].to_owned()));
222+
let vs: ~[&[u8]] = p.splitn(1, |b| *b == '=' as u8).collect();
223+
let key = vs[0].to_owned();
224+
let val = (if vs.len() < 2 { ~[] } else { vs[1].to_owned() });
225+
pairs.push((key, val));
210226
}
211227
pairs
212228
}
@@ -220,14 +236,34 @@ pub fn env() -> ~[(~str,~str)] {
220236
#[cfg(unix)]
221237
/// Fetches the environment variable `n` from the current process, returning
222238
/// None if the variable isn't set.
239+
///
240+
/// Any invalid UTF-8 bytes in the value are replaced by \uFFFD. See
241+
/// `str::from_utf8_lossy()` for details.
242+
///
243+
/// # Failure
244+
///
245+
/// Fails if `n` has any interior NULs.
223246
pub fn getenv(n: &str) -> Option<~str> {
247+
getenv_as_bytes(n).map(|v| str::from_utf8_lossy(v).into_owned())
248+
}
249+
250+
#[cfg(unix)]
251+
/// Fetches the environment variable `n` byte vector from the current process,
252+
/// returning None if the variable isn't set.
253+
///
254+
/// # Failure
255+
///
256+
/// Fails if `n` has any interior NULs.
257+
pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
258+
use c_str::CString;
259+
224260
unsafe {
225261
with_env_lock(|| {
226262
let s = n.with_c_str(|buf| libc::getenv(buf));
227263
if s.is_null() {
228264
None
229265
} else {
230-
Some(str::raw::from_c_str(s))
266+
Some(CString::new(s, false).as_bytes_no_nul().to_owned())
231267
}
232268
})
233269
}
@@ -249,10 +285,21 @@ pub fn getenv(n: &str) -> Option<~str> {
249285
}
250286
}
251287

288+
#[cfg(windows)]
289+
/// Fetches the environment variable `n` byte vector from the current process,
290+
/// returning None if the variable isn't set.
291+
pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
292+
getenv(n).map(|s| s.into_bytes())
293+
}
294+
252295

253296
#[cfg(unix)]
254297
/// Sets the environment variable `n` to the value `v` for the currently running
255298
/// process
299+
///
300+
/// # Failure
301+
///
302+
/// Fails if `n` or `v` have any interior NULs.
256303
pub fn setenv(n: &str, v: &str) {
257304
unsafe {
258305
with_env_lock(|| {
@@ -283,6 +330,10 @@ pub fn setenv(n: &str, v: &str) {
283330
}
284331

285332
/// Remove a variable from the environment entirely
333+
///
334+
/// # Failure
335+
///
336+
/// Fails (on unix) if `n` has any interior NULs.
286337
pub fn unsetenv(n: &str) {
287338
#[cfg(unix)]
288339
fn _unsetenv(n: &str) {
@@ -722,10 +773,12 @@ pub fn get_exit_status() -> int {
722773
}
723774

724775
#[cfg(target_os = "macos")]
725-
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
776+
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~[u8]] {
777+
use c_str::CString;
778+
726779
let mut args = ~[];
727780
for i in range(0u, argc as uint) {
728-
args.push(str::raw::from_c_str(*argv.offset(i as int)));
781+
args.push(CString::new(*argv.offset(i as int), false).as_bytes_no_nul().to_owned())
729782
}
730783
args
731784
}
@@ -736,7 +789,7 @@ unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
736789
* Returns a list of the command line arguments.
737790
*/
738791
#[cfg(target_os = "macos")]
739-
fn real_args() -> ~[~str] {
792+
fn real_args_as_bytes() -> ~[~[u8]] {
740793
unsafe {
741794
let (argc, argv) = (*_NSGetArgc() as int,
742795
*_NSGetArgv() as **c_char);
@@ -747,7 +800,7 @@ fn real_args() -> ~[~str] {
747800
#[cfg(target_os = "linux")]
748801
#[cfg(target_os = "android")]
749802
#[cfg(target_os = "freebsd")]
750-
fn real_args() -> ~[~str] {
803+
fn real_args_as_bytes() -> ~[~[u8]] {
751804
use rt;
752805

753806
match rt::args::clone() {
@@ -756,6 +809,11 @@ fn real_args() -> ~[~str] {
756809
}
757810
}
758811

812+
#[cfg(not(windows))]
813+
fn real_args() -> ~[~str] {
814+
real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect()
815+
}
816+
759817
#[cfg(windows)]
760818
fn real_args() -> ~[~str] {
761819
use vec;
@@ -786,6 +844,11 @@ fn real_args() -> ~[~str] {
786844
return args;
787845
}
788846

847+
#[cfg(windows)]
848+
fn real_args_as_bytes() -> ~[~[u8]] {
849+
real_args().move_iter().map(|s| s.into_bytes()).collect()
850+
}
851+
789852
type LPCWSTR = *u16;
790853

791854
#[cfg(windows)]
@@ -803,10 +866,19 @@ extern "system" {
803866

804867
/// Returns the arguments which this program was started with (normally passed
805868
/// via the command line).
869+
///
870+
/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD.
871+
/// See `str::from_utf8_lossy` for details.
806872
pub fn args() -> ~[~str] {
807873
real_args()
808874
}
809875

876+
/// Returns the arguments which this program was started with (normally passed
877+
/// via the command line) as byte vectors.
878+
pub fn args_as_bytes() -> ~[~[u8]] {
879+
real_args_as_bytes()
880+
}
881+
810882
#[cfg(target_os = "macos")]
811883
extern {
812884
// These functions are in crt_externs.h.

src/libstd/path/mod.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,7 @@ impl BytesContainer for ~[u8] {
578578
impl BytesContainer for CString {
579579
#[inline]
580580
fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
581-
let s = self.as_bytes();
582-
s.slice_to(s.len()-1)
581+
self.as_bytes_no_nul()
583582
}
584583
}
585584

0 commit comments

Comments
 (0)