diff --git a/src/compiletest/procsrv.rs b/src/compiletest/procsrv.rs index f86ab2c90931..f25f2c984c65 100644 --- a/src/compiletest/procsrv.rs +++ b/src/compiletest/procsrv.rs @@ -61,11 +61,11 @@ pub fn run(lib_path: &str, for input.each |input| { proc.input().write_str(*input); } - let output = proc.finish_with_output(); + let run::ProcessOutput { status, output, error, _ } = proc.finish_with_output(); Result { - status: output.status, - out: str::from_bytes(output.output), - err: str::from_bytes(output.error) + status: status, + out: str::from_bytes(output), + err: str::from_bytes(error) } } diff --git a/src/libextra/ebml.rs b/src/libextra/ebml.rs index 233c80426400..9322fa82380f 100644 --- a/src/libextra/ebml.rs +++ b/src/libextra/ebml.rs @@ -725,7 +725,7 @@ pub mod writer { } pub fn wr_tagged_str(&mut self, tag_id: uint, v: &str) { - str::byte_slice(v, |b| self.wr_tagged_bytes(tag_id, b)); + self.wr_tagged_bytes(tag_id, str::as_bytes_slice(v)); } pub fn wr_bytes(&mut self, b: &[u8]) { diff --git a/src/libextra/net_tcp.rs b/src/libextra/net_tcp.rs index 87ebfdfb7976..0b598be62814 100644 --- a/src/libextra/net_tcp.rs +++ b/src/libextra/net_tcp.rs @@ -1810,11 +1810,9 @@ mod test { fn buf_write(w: &W, val: &str) { debug!("BUF_WRITE: val len %?", str::len(val)); - do str::byte_slice(val) |b_slice| { - debug!("BUF_WRITE: b_slice len %?", - b_slice.len()); - w.write(b_slice) - } + let b_slice = str::as_bytes_slice(val); + debug!("BUF_WRITE: b_slice len %?", b_slice.len()); + w.write(b_slice) } fn buf_read(r: &R, len: uint) -> ~str { diff --git a/src/libfuzzer/fuzzer.rc b/src/libfuzzer/fuzzer.rc index 4e285e686df5..d7519b5c2cf3 100644 --- a/src/libfuzzer/fuzzer.rc +++ b/src/libfuzzer/fuzzer.rc @@ -433,8 +433,12 @@ pub fn check_running(exe_filename: &Path) -> happiness { let p = run::process_output( "/Users/jruderman/scripts/timed_run_rust_program.py", [exe_filename.to_str()]); - let comb = str::from_bytes(p.output) + "\n" + str::from_bytes(p.error); - if str::len(comb) > 1u { + + let out = str::from_bytes_slice(p.output); + let err = str::from_bytes_slice(p.error); + + let comb = fmt!("%s\n%s", out, err); + if comb.len() > 1u { error!("comb comb comb: %?", comb); } @@ -473,8 +477,8 @@ pub fn check_compiling(filename: &Path) -> happiness { "/Users/jruderman/code/rust/build/x86_64-apple-darwin/stage1/bin/rustc", [filename.to_str()]); - let out = str::from_bytes(p.output); - let err = str::from_bytes(p.error); + let out = str::from_bytes_slice(p.output); + let err = str::from_bytes_slice(p.error); //error!("Status: %d", p.status); if p.status == 0 { diff --git a/src/librustc/metadata/decoder.rs b/src/librustc/metadata/decoder.rs index 81c1560f18bd..6bab2f7f65f6 100644 --- a/src/librustc/metadata/decoder.rs +++ b/src/librustc/metadata/decoder.rs @@ -941,7 +941,7 @@ fn read_path(d: ebml::Doc) -> (~str, uint) { do reader::with_doc_data(d) |desc| { let pos = io::u64_from_be_bytes(desc, 0u, 4u) as uint; let pathbytes = desc.slice(4u, desc.len()); - let path = str::from_bytes(pathbytes); + let path = str::from_bytes_slice(pathbytes).to_owned(); (path, pos) } diff --git a/src/librustc/metadata/tydecode.rs b/src/librustc/metadata/tydecode.rs index cf2a92b291f2..2a8b5bc3fb8c 100644 --- a/src/librustc/metadata/tydecode.rs +++ b/src/librustc/metadata/tydecode.rs @@ -89,7 +89,7 @@ fn scan(st: &mut PState, is_last: &fn(char) -> bool, } let end_pos = st.pos; st.pos += 1; - return op(st.data.slice(start_pos, end_pos)); + op(st.data.slice(start_pos, end_pos)) } pub fn parse_ident(st: &mut PState, last: char) -> ast::ident { @@ -97,10 +97,10 @@ pub fn parse_ident(st: &mut PState, last: char) -> ast::ident { return parse_ident_(st, |a| is_last(last, a) ); } -fn parse_ident_(st: &mut PState, is_last: @fn(char) -> bool) -> - ast::ident { - let rslt = scan(st, is_last, str::from_bytes); - return st.tcx.sess.ident_of(rslt); +fn parse_ident_(st: &mut PState, is_last: @fn(char) -> bool) -> ast::ident { + do scan(st, is_last) |v| { + st.tcx.sess.ident_of(str::from_bytes_slice(v)) + } } pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: int, @@ -409,7 +409,9 @@ fn parse_mt(st: &mut PState, conv: conv_did) -> ty::mt { fn parse_def(st: &mut PState, source: DefIdSource, conv: conv_did) -> ast::def_id { - return conv(source, scan(st, |c| { c == '|' }, parse_def_id)); + do scan(st, |c| { c == '|' }) |v| { + conv(source, parse_def_id(v)) + } } fn parse_uint(st: &mut PState) -> uint { @@ -450,9 +452,10 @@ fn parse_abi_set(st: &mut PState) -> AbiSet { assert_eq!(next(st), '['); let mut abis = AbiSet::empty(); while peek(st) != ']' { - // FIXME(#5422) str API should not force this copy - let abi_str = scan(st, |c| c == ',', str::from_bytes); - let abi = abi::lookup(abi_str).expect(abi_str); + let abi = do scan(st, |c| c == ',') |v| { + let abi_str = str::from_bytes_slice(v); + abi::lookup(abi_str).expect(abi_str) + }; abis.add(abi); } assert_eq!(next(st), ']'); diff --git a/src/librustc/util/ppaux.rs b/src/librustc/util/ppaux.rs index d4f1e42d5ff8..1ede5fddf51f 100644 --- a/src/librustc/util/ppaux.rs +++ b/src/librustc/util/ppaux.rs @@ -432,12 +432,9 @@ pub fn ty_to_str(cx: ctxt, typ: t) -> ~str { ty_err => ~"[type error]", ty_param(param_ty {idx: id, def_id: did}) => { if cx.sess.verbose() { - fmt!("'%s:%?", - str::from_bytes([('a' as u8) + (id as u8)]), - did) + fmt!("'%s:%?", str::from_byte(('a' as u8) + (id as u8)), did) } else { - fmt!("'%s", - str::from_bytes([('a' as u8) + (id as u8)])) + fmt!("'%s", str::from_byte(('a' as u8) + (id as u8))) } } ty_self(*) => ~"Self", diff --git a/src/librustdoc/markdown_writer.rs b/src/librustdoc/markdown_writer.rs index 7bcfa1acdfad..5245126314dc 100644 --- a/src/librustdoc/markdown_writer.rs +++ b/src/librustdoc/markdown_writer.rs @@ -116,8 +116,8 @@ fn pandoc_writer( debug!("pandoc result: %i", output.status); if output.status != 0 { - error!("pandoc-out: %s", str::from_bytes(output.output)); - error!("pandoc-err: %s", str::from_bytes(output.error)); + error!("pandoc-out: %s", str::from_bytes_slice(output.output)); + error!("pandoc-err: %s", str::from_bytes_slice(output.error)); fail!("pandoc failed"); } } diff --git a/src/librustpkg/rustpkg.rc b/src/librustpkg/rustpkg.rc index 37b8c2ad4338..745bfb91fa56 100644 --- a/src/librustpkg/rustpkg.rc +++ b/src/librustpkg/rustpkg.rc @@ -162,7 +162,7 @@ impl<'self> PkgScript<'self> { let output = run::process_output(exe.to_str(), [root.to_str(), ~"configs"]); // Run the configs() function to get the configs let mut cfgs = ~[]; - for str::each_word(str::from_bytes(output.output)) |w| { + for str::each_word(str::from_bytes_slice(output.output)) |w| { cfgs.push(w.to_owned()); } (cfgs, output.status) diff --git a/src/librustpkg/version.rs b/src/librustpkg/version.rs index 92906f5af543..574c58d7fafb 100644 --- a/src/librustpkg/version.rs +++ b/src/librustpkg/version.rs @@ -103,14 +103,14 @@ pub fn try_getting_version(remote_path: &RemotePath) -> Option { tmp_dir.to_str()]); if outp.status == 0 { debug!("Cloned it... ( %s, %s )", - str::from_bytes(outp.output), - str::from_bytes(outp.error)); + str::from_bytes_slice(outp.output), + str::from_bytes_slice(outp.error)); let mut output = None; debug!("executing {git --git-dir=%s tag -l}", tmp_dir.push(".git").to_str()); let outp = run::process_output("git", [fmt!("--git-dir=%s", tmp_dir.push(".git").to_str()), ~"tag", ~"-l"]); - let output_text = str::from_bytes(outp.output); + let output_text = str::from_bytes_slice(outp.output); debug!("Full output: ( %s ) [%?]", output_text, outp.status); for output_text.each_split_char('\n') |l| { debug!("A line of output: %s", l); @@ -212,4 +212,4 @@ fn test_split_version() { let s = "a#1.2"; assert!(split_version(s) == Some((s.slice(0, 1), ExactRevision(~"1.2")))); assert!(split_version("a#a#3.4") == None); -} \ No newline at end of file +} diff --git a/src/libstd/io.rs b/src/libstd/io.rs index e3977ca00674..5f3465aeb494 100644 --- a/src/libstd/io.rs +++ b/src/libstd/io.rs @@ -1091,7 +1091,7 @@ pub fn with_bytes_reader(bytes: &[u8], f: &fn(@Reader) -> T) -> T { } pub fn with_str_reader(s: &str, f: &fn(@Reader) -> T) -> T { - str::byte_slice(s, |bytes| with_bytes_reader(bytes, f)) + with_bytes_reader(str::as_bytes_slice(s), f) } // Writing @@ -1462,7 +1462,7 @@ impl WriterUtil for T { self.write_str(str::from_char(ch)); } } - fn write_str(&self, s: &str) { str::byte_slice(s, |v| self.write(v)) } + fn write_str(&self, s: &str) { self.write(str::as_bytes_slice(s)) } fn write_line(&self, s: &str) { self.write_str(s); self.write_str(&"\n"); diff --git a/src/libstd/option.rs b/src/libstd/option.rs index 2386a779235d..5904b4b4bea7 100644 --- a/src/libstd/option.rs +++ b/src/libstd/option.rs @@ -376,90 +376,98 @@ impl Option { } } -#[test] -fn test_unwrap_ptr() { - unsafe { - let x = ~0; - let addr_x: *int = ::cast::transmute(&*x); +#[cfg(test)] +mod tests { + use super::*; + use std::str; + use std::util; + + #[test] + fn test_unwrap_ptr() { + unsafe { + let x = ~0; + let addr_x: *int = ::cast::transmute(&*x); + let opt = Some(x); + let y = opt.unwrap(); + let addr_y: *int = ::cast::transmute(&*y); + assert_eq!(addr_x, addr_y); + } + } + + #[test] + fn test_unwrap_str() { + let x = ~"test"; + let addr_x = str::as_buf(x, |buf, _len| buf); let opt = Some(x); let y = opt.unwrap(); - let addr_y: *int = ::cast::transmute(&*y); + let addr_y = str::as_buf(y, |buf, _len| buf); assert_eq!(addr_x, addr_y); } -} - -#[test] -fn test_unwrap_str() { - let x = ~"test"; - let addr_x = str::as_buf(x, |buf, _len| buf); - let opt = Some(x); - let y = opt.unwrap(); - let addr_y = str::as_buf(y, |buf, _len| buf); - assert_eq!(addr_x, addr_y); -} -#[test] -fn test_unwrap_resource() { - struct R { - i: @mut int, - } - - #[unsafe_destructor] - impl ::ops::Drop for R { - fn finalize(&self) { *(self.i) += 1; } - } + #[test] + fn test_unwrap_resource() { + struct R { + i: @mut int, + } - fn R(i: @mut int) -> R { - R { - i: i + #[unsafe_destructor] + impl ::ops::Drop for R { + fn finalize(&self) { *(self.i) += 1; } } - } - let i = @mut 0; - { - let x = R(i); - let opt = Some(x); - let _y = opt.unwrap(); - } - assert_eq!(*i, 1); -} + fn R(i: @mut int) -> R { + R { + i: i + } + } -#[test] -fn test_option_dance() { - let x = Some(()); - let mut y = Some(5); - let mut y2 = 0; - for x.each |_x| { - y2 = y.swap_unwrap(); + let i = @mut 0; + { + let x = R(i); + let opt = Some(x); + let _y = opt.unwrap(); + } + assert_eq!(*i, 1); } - assert_eq!(y2, 5); - assert!(y.is_none()); -} -#[test] #[should_fail] #[ignore(cfg(windows))] -fn test_option_too_much_dance() { - let mut y = Some(util::NonCopyable::new()); - let _y2 = y.swap_unwrap(); - let _y3 = y.swap_unwrap(); -} -#[test] -fn test_option_while_some() { - let mut i = 0; - do Some(10).while_some |j| { - i += 1; - if (j > 0) { - Some(j-1) - } else { - None + #[test] + fn test_option_dance() { + let x = Some(()); + let mut y = Some(5); + let mut y2 = 0; + for x.each |_x| { + y2 = y.swap_unwrap(); + } + assert_eq!(y2, 5); + assert!(y.is_none()); + } + + #[test] #[should_fail] #[ignore(cfg(windows))] + fn test_option_too_much_dance() { + let mut y = Some(util::NonCopyable::new()); + let _y2 = y.swap_unwrap(); + let _y3 = y.swap_unwrap(); + } + + #[test] + fn test_option_while_some() { + let mut i = 0; + do Some(10).while_some |j| { + i += 1; + if (j > 0) { + Some(j-1) + } else { + None + } } + assert_eq!(i, 11); } - assert_eq!(i, 11); -} -#[test] -fn test_get_or_zero() { - let some_stuff = Some(42); - assert_eq!(some_stuff.get_or_zero(), 42); - let no_stuff: Option = None; - assert_eq!(no_stuff.get_or_zero(), 0); + #[test] + fn test_get_or_zero() { + let some_stuff = Some(42); + assert_eq!(some_stuff.get_or_zero(), 42); + let no_stuff: Option = None; + assert_eq!(no_stuff.get_or_zero(), 0); + } } diff --git a/src/libstd/rt/io/flate.rs b/src/libstd/rt/io/flate.rs index 0a9e0b1e38f2..833e6731b8fb 100644 --- a/src/libstd/rt/io/flate.rs +++ b/src/libstd/rt/io/flate.rs @@ -117,7 +117,7 @@ mod test { let mut out_bytes = [0, .. 100]; let bytes_read = inflate_reader.read(out_bytes).get(); assert_eq!(bytes_read, in_bytes.len()); - let out_msg = str::from_bytes(out_bytes); + let out_msg = str::from_bytes_slice(out_bytes); assert!(in_msg == out_msg); } } diff --git a/src/libstd/rt/uv/mod.rs b/src/libstd/rt/uv/mod.rs index 10c8b84bc512..aa18e647d483 100644 --- a/src/libstd/rt/uv/mod.rs +++ b/src/libstd/rt/uv/mod.rs @@ -241,17 +241,9 @@ pub fn uv_error_to_io_error(uverr: UvError) -> IoError { // XXX: Could go in str::raw unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str { - let s = s as *u8; - let mut (curr, len) = (s, 0u); - while *curr != 0u8 { - len += 1u; - curr = ptr::offset(s, len); - } - - str::raw::buf_as_slice(s, len, |d| cast::transmute(d)) + str::raw::c_str_as_slice(s, |d| cast::transmute(d)) } - unsafe { // Importing error constants use rt::uv::uvll::*; diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 2af300fc1b88..b813a4a8c21c 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -31,6 +31,7 @@ use option::{None, Option, Some}; use old_iter::{BaseIter, EqIter}; use ptr; use ptr::RawPtr; +use result::{Result, Ok, Err}; use str; use to_str::ToStr; use uint; @@ -42,8 +43,37 @@ use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector}; /* Section: Conditions */ +// Raised by `from_bytes` on non-UTF-8 input condition! { - not_utf8: (~str) -> ~str; + // FIXME (#6009): uncomment `pub` after expansion support lands. + /*pub*/ not_utf8: (&'static str, uint) -> ~str; +} + +// Raised by `from_bytes_with_null` on input that is not NULL terminated. +condition! { + // FIXME (#6009): uncomment `pub` after expansion support lands. + /*pub*/ not_null_terminated: &'static str -> ~str; +} + +fn check_utf8(v: &[u8]) -> Result<(), ~str> { + use str::not_utf8::cond; + + if is_utf8(v) { + Ok(()) + } else { + let first_bad_byte = v.position(|b| !is_utf8([*b])).unwrap(); + Err(cond.raise(("input is not UTF-8", first_bad_byte))) + } +} + +fn check_null_terminated(v: &[u8]) -> Result<(), ~str> { + use str::not_null_terminated::cond; + + if v[v.len() - 1] == 0 { + Ok(()) + } else { + Err(cond.raise("input is not NULL terminated")) + } } /* @@ -57,35 +87,33 @@ Section: Creating a string * * Raises the `not_utf8` condition if invalid UTF-8 */ - -pub fn from_bytes(vv: &[u8]) -> ~str { - use str::not_utf8::cond; - - if !is_utf8(vv) { - let first_bad_byte = vec::find(vv, |b| !is_utf8([*b])).get(); - cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u", - first_bad_byte as uint)) - } - else { - return unsafe { raw::from_bytes(vv) } +pub fn from_bytes(v: ~[u8]) -> ~str { + match check_utf8(v) { + Ok(()) => unsafe { raw::from_bytes(v) }, + Err(s) => s, } } /** - * Convert a vector of bytes to a UTF-8 string. - * The vector needs to be one byte longer than the string, and end with a 0 byte. - * - * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str. + * Convert a vector of bytes to a UTF-8 string without performing any + * allocations. The vector needs to be one byte longer than the string, and end + * with a 0 byte. * * # Failure * - * Fails if invalid UTF-8 - * Fails if not null terminated + * Raises the `not_utf8` condition if invalid UTF-8 + * Raises the `not_null_terminated` condition if not NULL terminated */ -pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str { - assert_eq!(vv[vv.len() - 1], 0); - assert!(is_utf8(vv)); - return unsafe { raw::from_bytes_with_null(vv) }; +pub fn from_bytes_with_null(v: ~[u8]) -> ~str { + match check_null_terminated(v) { + Ok(()) => { + match check_utf8(v) { + Ok(()) => unsafe { raw::from_bytes_with_null(v) }, + Err(s) => s, + } + } + Err(s) => s, + } } /** @@ -98,15 +126,32 @@ pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str { * * Fails if invalid UTF-8 */ -pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str { +pub fn from_bytes_slice<'a>(v: &'a [u8]) -> &'a str { + assert!(is_utf8(v)); unsafe { - assert!(is_utf8(vector)); - let (ptr, len): (*u8, uint) = ::cast::transmute(vector); - let string: &'a str = ::cast::transmute((ptr, len + 1)); - string + let (ptr, len): (*u8, uint) = ::cast::transmute(v); + let s: &'a str = ::cast::transmute((ptr, len + 1)); + s } } +/** + * Convert a vector of bytes to a UTF-8 string. + * The vector needs to be one byte longer than the string, and end with a 0 byte. + * + * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str. + * + * # Failure + * + * Fails if not NULL terminated + * Fails if invalid UTF-8 + */ +pub fn from_bytes_slice_with_null<'a>(v: &'a [u8]) -> &'a str { + assert_eq!(v[v.len() - 1], 0); + assert!(is_utf8(v)); + unsafe { raw::from_bytes_slice_with_null(v) } +} + /// Copy a slice into a new unique str #[inline(always)] pub fn to_owned(s: &str) -> ~str { @@ -589,23 +634,6 @@ pub fn to_bytes(s: &str) -> ~[u8] { } } -/// Work with the string as a byte slice, not including trailing null. -#[inline(always)] -pub fn byte_slice(s: &str, f: &fn(v: &[u8]) -> T) -> T { - do as_buf(s) |p,n| { - unsafe { vec::raw::buf_as_slice(p, n-1u, f) } - } -} - -/// Work with the string as a byte slice, not including trailing null, without -/// a callback. -#[inline(always)] -pub fn byte_slice_no_callback<'a>(s: &'a str) -> &'a [u8] { - unsafe { - cast::transmute(s) - } -} - /// Convert a string to a unique vector of characters pub fn to_chars(s: &str) -> ~[char] { s.iter().collect() @@ -2122,7 +2150,9 @@ static tag_six_b: uint = 252u; * # Example * * ~~~ {.rust} - * let i = str::as_bytes("Hello World") { |bytes| bytes.len() }; + * let i = str::as_bytes(&~"Hello World") { |bytes| bytes.len() }; + * + * assert_eq!(i, 12); * ~~~ */ #[inline] @@ -2137,6 +2167,14 @@ pub fn as_bytes(s: &const ~str, f: &fn(&~[u8]) -> T) -> T { * Work with the byte buffer of a string as a byte slice. * * The byte slice does not include the null terminator. + * + * # Example + * + * ~~~ {.rust} + * let v = str::as_bytes_slice("Hello World"); + * + * assert_eq!(v, &[72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100]) + * ~~~ */ pub fn as_bytes_slice<'a>(s: &'a str) -> &'a [u8] { unsafe { @@ -2332,19 +2370,25 @@ pub mod raw { use str::{as_buf, is_utf8, len, reserve_at_least}; use vec; - /// Create a Rust string from a null-terminated *u8 buffer - pub unsafe fn from_buf(buf: *u8) -> ~str { - let mut (curr, i) = (buf, 0u); - while *curr != 0u8 { - i += 1u; + /// Count the number of bytes in a null-terminated *u8 buffer + pub unsafe fn buf_len(buf: *u8) -> uint { + let mut curr = buf; + let mut i = 0; + while *curr != 0 { + i += 1; curr = ptr::offset(buf, i); } - return from_buf_len(buf, i); + i + } + + /// Create a Rust string from a null-terminated *u8 buffer + pub unsafe fn from_buf(buf: *u8) -> ~str { + from_buf_len(buf, buf_len(buf)) } /// Create a Rust string from a *u8 buffer of the given length pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> ~str { - let mut v: ~[u8] = vec::with_capacity(len + 1); + let mut v = vec::with_capacity(len + 1); vec::as_mut_buf(v, |vbuf, _len| { ptr::copy_memory(vbuf, buf as *u8, len) }); @@ -2366,30 +2410,51 @@ pub mod raw { } /// Converts a vector of bytes to a new owned string. - pub unsafe fn from_bytes(v: &const [u8]) -> ~str { - do vec::as_const_buf(v) |buf, len| { - from_buf_len(buf, len) - } + pub unsafe fn from_bytes(mut v: ~[u8]) -> ~str { + // Make sure the string is NULL terminated. + v.push(0); + from_bytes_with_null(v) + } + + /// Converts a vector of bytes to a new owned string. + pub unsafe fn from_bytes_with_null(v: ~[u8]) -> ~str { + cast::transmute(v) + } + + /// Converts a vector of bytes to a string slice. + /// The byte slice needs to contain valid utf8. + pub unsafe fn from_bytes_slice<'a>(v: &'a [u8]) -> &'a str { + let (ptr, len): (*u8, uint) = ::cast::transmute(v); + cast::transmute((ptr, len + 1)) } /// Converts a vector of bytes to a string. /// The byte slice needs to contain valid utf8 and needs to be one byte longer than /// the string, if possible ending in a 0 byte. - pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str { + pub unsafe fn from_bytes_slice_with_null<'a>(v: &'a [u8]) -> &'a str { cast::transmute(v) } /// Converts a byte to a string. - pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) } + pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes_with_null(~[u, 0]) } + + /// Form a slice from a null terminated *u8 buffer without copying. + pub unsafe fn buf_as_slice(buf: *u8, f: &fn(v: &str) -> T) -> T { + buf_len_as_slice(buf, buf_len(buf), f) + } /// Form a slice from a *u8 buffer of the given length without copying. - pub unsafe fn buf_as_slice(buf: *u8, len: uint, - f: &fn(v: &str) -> T) -> T { + pub unsafe fn buf_len_as_slice(buf: *u8, len: uint, f: &fn(v: &str) -> T) -> T { let v = (buf, len + 1); assert!(is_utf8(::cast::transmute(v))); f(::cast::transmute(v)) } + /// Form a slice from a *u8 buffer of the given length without copying. + pub unsafe fn c_str_as_slice(buf: *libc::c_char, f: &fn(v: &str) -> T) -> T { + buf_as_slice(cast::transmute(buf), f) + } + /** * Takes a bytewise (not UTF-8) slice from a string. * @@ -2816,18 +2881,19 @@ impl<'self> Iterator for StrBytesRevIterator<'self> { #[cfg(test)] mod tests { - use iterator::IteratorUtil; - use container::Container; + use super::*; use char; - use option::Some; + use cmp::{TotalOrd, Less, Equal, Greater}; + use container::Container; + use iterator::IteratorUtil; use libc::c_char; use libc; use old_iter::BaseIter; + use option::Some; use ptr; - use str::*; - use vec; + use uint; use vec::ImmutableVector; - use cmp::{TotalOrd, Less, Equal, Greater}; + use vec; #[test] fn test_eq() { @@ -3499,6 +3565,137 @@ mod tests { fn test_from_bytes() { let ss = ~"ศไทย中华Việt Nam"; let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8, + 0xe0_u8, 0xb9_u8, 0x84_u8, + 0xe0_u8, 0xb8_u8, 0x97_u8, + 0xe0_u8, 0xb8_u8, 0xa2_u8, + 0xe4_u8, 0xb8_u8, 0xad_u8, + 0xe5_u8, 0x8d_u8, 0x8e_u8, + 0x56_u8, 0x69_u8, 0xe1_u8, + 0xbb_u8, 0x87_u8, 0x74_u8, + 0x20_u8, 0x4e_u8, 0x61_u8, + 0x6d_u8]; + + assert_eq!(ss, from_bytes(bb)); + } + + #[test] + #[ignore(cfg(windows))] + fn test_from_bytes_fail() { + use str::not_utf8::cond; + + let mut error_happened = false; + let _x = do cond.trap(|(err, pos)| { + assert_eq!(err, "input is not UTF-8"); + assert_eq!(pos, 0); + error_happened = true; + ~"" + }).in { + let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8, + 0xe0_u8, 0xb9_u8, 0x84_u8, + 0xe0_u8, 0xb8_u8, 0x97_u8, + 0xe0_u8, 0xb8_u8, 0xa2_u8, + 0xe4_u8, 0xb8_u8, 0xad_u8, + 0xe5_u8, 0x8d_u8, 0x8e_u8, + 0x56_u8, 0x69_u8, 0xe1_u8, + 0xbb_u8, 0x87_u8, 0x74_u8, + 0x20_u8, 0x4e_u8, 0x61_u8, + 0x6d_u8]; + + from_bytes(bb) + }; + assert!(error_happened); + } + + #[test] + fn test_unsafe_from_bytes_with_null() { + let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8]; + let b = unsafe { raw::from_bytes_with_null(a) }; + assert_eq!(b, ~"AAAAAAA"); + } + + #[test] + fn test_from_bytes_with_null() { + let ss = ~"ศไทย中华Việt Nam"; + let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8, + 0xe0_u8, 0xb9_u8, 0x84_u8, + 0xe0_u8, 0xb8_u8, 0x97_u8, + 0xe0_u8, 0xb8_u8, 0xa2_u8, + 0xe4_u8, 0xb8_u8, 0xad_u8, + 0xe5_u8, 0x8d_u8, 0x8e_u8, + 0x56_u8, 0x69_u8, 0xe1_u8, + 0xbb_u8, 0x87_u8, 0x74_u8, + 0x20_u8, 0x4e_u8, 0x61_u8, + 0x6d_u8, 0x0_u8]; + + assert_eq!(ss, from_bytes_with_null(bb)); + } + + #[test] + #[ignore(cfg(windows))] + fn test_from_bytes_with_null_fail() { + use str::not_utf8::cond; + + let mut error_happened = false; + let _x = do cond.trap(|(err, pos)| { + assert_eq!(err, "input is not UTF-8"); + assert_eq!(pos, 0); + error_happened = true; + ~"" + }).in { + let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8, + 0xe0_u8, 0xb9_u8, 0x84_u8, + 0xe0_u8, 0xb8_u8, 0x97_u8, + 0xe0_u8, 0xb8_u8, 0xa2_u8, + 0xe4_u8, 0xb8_u8, 0xad_u8, + 0xe5_u8, 0x8d_u8, 0x8e_u8, + 0x56_u8, 0x69_u8, 0xe1_u8, + 0xbb_u8, 0x87_u8, 0x74_u8, + 0x20_u8, 0x4e_u8, 0x61_u8, + 0x6d_u8, 0x0_u8]; + + from_bytes_with_null(bb) + }; + assert!(error_happened); + } + + #[test] + #[ignore(cfg(windows))] + fn test_from_bytes_with_null_fail_2() { + use str::not_null_terminated::cond; + + let mut error_happened = false; + let _x = do cond.trap(|err| { + assert_eq!(err, "input is not NULL terminated"); + error_happened = true; + ~"" + }).in { + let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8, + 0xe0_u8, 0xb9_u8, 0x84_u8, + 0xe0_u8, 0xb8_u8, 0x97_u8, + 0xe0_u8, 0xb8_u8, 0xa2_u8, + 0xe4_u8, 0xb8_u8, 0xad_u8, + 0xe5_u8, 0x8d_u8, 0x8e_u8, + 0x56_u8, 0x69_u8, 0xe1_u8, + 0xbb_u8, 0x87_u8, 0x74_u8, + 0x20_u8, 0x4e_u8, 0x61_u8, + 0x6d_u8, 0x60_u8]; + + from_bytes_with_null(bb) + }; + assert!(error_happened); + } + + #[test] + fn test_unsafe_from_bytes_slice() { + let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8]; + let b = unsafe { raw::from_bytes_slice(a) }; + assert_eq!(b, "AAAAAAA"); + } + + #[test] + fn test_from_bytes_slice() { + let ss = "ศไทย中华Việt Nam"; + let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8, 0xe0_u8, 0xb9_u8, 0x84_u8, 0xe0_u8, 0xb8_u8, 0x97_u8, 0xe0_u8, 0xb8_u8, 0xa2_u8, @@ -3509,15 +3706,14 @@ mod tests { 0x20_u8, 0x4e_u8, 0x61_u8, 0x6d_u8]; - assert_eq!(ss, from_bytes(bb)); + assert_eq!(ss, from_bytes_slice(bb)); } #[test] + #[should_fail] #[ignore(cfg(windows))] - fn test_from_bytes_fail() { - use str::not_utf8::cond; - - let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8, + fn test_from_bytes_slice_fail() { + let bb = [0xff_u8, 0xb8_u8, 0xa8_u8, 0xe0_u8, 0xb9_u8, 0x84_u8, 0xe0_u8, 0xb8_u8, 0x97_u8, 0xe0_u8, 0xb8_u8, 0xa2_u8, @@ -3528,26 +3724,18 @@ mod tests { 0x20_u8, 0x4e_u8, 0x61_u8, 0x6d_u8]; - let mut error_happened = false; - let _x = do cond.trap(|err| { - assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255"); - error_happened = true; - ~"" - }).in { - from_bytes(bb) - }; - assert!(error_happened); + let _x = from_bytes_slice(bb); } #[test] - fn test_unsafe_from_bytes_with_null() { + fn test_unsafe_from_bytes_slice_with_null() { let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8]; - let b = unsafe { raw::from_bytes_with_null(a) }; + let b = unsafe { raw::from_bytes_slice_with_null(a) }; assert_eq!(b, "AAAAAAA"); } #[test] - fn test_from_bytes_with_null() { + fn test_from_bytes_slice_with_null() { let ss = "ศไทย中华Việt Nam"; let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8, 0xe0_u8, 0xb9_u8, 0x84_u8, @@ -3560,13 +3748,13 @@ mod tests { 0x20_u8, 0x4e_u8, 0x61_u8, 0x6d_u8, 0x0_u8]; - assert_eq!(ss, from_bytes_with_null(bb)); + assert_eq!(ss, from_bytes_slice_with_null(bb)); } #[test] #[should_fail] #[ignore(cfg(windows))] - fn test_from_bytes_with_null_fail() { + fn test_from_bytes_slice_with_null_fail() { let bb = [0xff_u8, 0xb8_u8, 0xa8_u8, 0xe0_u8, 0xb9_u8, 0x84_u8, 0xe0_u8, 0xb8_u8, 0x97_u8, @@ -3578,13 +3766,13 @@ mod tests { 0x20_u8, 0x4e_u8, 0x61_u8, 0x6d_u8, 0x0_u8]; - let _x = from_bytes_with_null(bb); + let _x = from_bytes_slice_with_null(bb); } #[test] #[should_fail] #[ignore(cfg(windows))] - fn test_from_bytes_with_null_fail_2() { + fn test_from_bytes_slice_with_null_fail_2() { let bb = [0xff_u8, 0xb8_u8, 0xa8_u8, 0xe0_u8, 0xb9_u8, 0x84_u8, 0xe0_u8, 0xb8_u8, 0x97_u8, @@ -3596,7 +3784,7 @@ mod tests { 0x20_u8, 0x4e_u8, 0x61_u8, 0x6d_u8, 0x60_u8]; - let _x = from_bytes_with_null(bb); + let _x = from_bytes_slice_with_null(bb); } #[test] @@ -3687,21 +3875,22 @@ mod tests { #[test] fn vec_str_conversions() { - let s1: ~str = ~"All mimsy were the borogoves"; + let s1 = ~"All mimsy were the borogoves"; + let n1 = s1.len(); + + let v = to_bytes(s1); + let n2 = v.len(); - let v: ~[u8] = to_bytes(s1); - let s2: ~str = from_bytes(v); - let mut i: uint = 0u; - let n1: uint = len(s1); - let n2: uint = v.len(); assert_eq!(n1, n2); - while i < n1 { - let a: u8 = s1[i]; - let b: u8 = s2[i]; + + let s2 = from_bytes(v); + + for uint::range(0, n1) |i| { + let a = s1[i]; + let b = s2[i]; debug!(a); debug!(b); assert_eq!(a, b); - i += 1u; } } diff --git a/src/libstd/str/ascii.rs b/src/libstd/str/ascii.rs index e288d605714f..e8fb4a01c81a 100644 --- a/src/libstd/str/ascii.rs +++ b/src/libstd/str/ascii.rs @@ -64,7 +64,9 @@ impl Ascii { impl ToStr for Ascii { #[inline(always)] - fn to_str(&self) -> ~str { str::from_bytes(['\'' as u8, self.chr, '\'' as u8]) } + fn to_str(&self) -> ~str { + str::from_bytes_with_null(~['\'' as u8, self.chr, '\'' as u8, 0]) + } } /// Trait for converting into an ascii type. diff --git a/src/libstd/to_bytes.rs b/src/libstd/to_bytes.rs index 77e7583ebe53..3fc2d6595fed 100644 --- a/src/libstd/to_bytes.rs +++ b/src/libstd/to_bytes.rs @@ -239,27 +239,21 @@ impl IterBytes for @[A] { impl<'self> IterBytes for &'self str { #[inline(always)] fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool { - do str::byte_slice(*self) |bytes| { - f(bytes) - } + f(str::as_bytes_slice(*self)) } } impl IterBytes for ~str { #[inline(always)] fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool { - do str::byte_slice(*self) |bytes| { - f(bytes) - } + f(str::as_bytes_slice(*self)) } } impl IterBytes for @str { #[inline(always)] fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool { - do str::byte_slice(*self) |bytes| { - f(bytes) - } + f(str::as_bytes_slice(*self)) } } diff --git a/src/libstd/vec.rs b/src/libstd/vec.rs index 19233c533481..2d70f254a3c6 100644 --- a/src/libstd/vec.rs +++ b/src/libstd/vec.rs @@ -2404,13 +2404,13 @@ pub mod bytes { use vec; /// Bytewise string comparison - pub fn memcmp(a: &~[u8], b: &~[u8]) -> int { + pub fn memcmp(a: &[u8], b: &[u8]) -> int { let a_len = a.len(); let b_len = b.len(); let n = uint::min(a_len, b_len) as libc::size_t; let r = unsafe { - libc::memcmp(raw::to_ptr(*a) as *libc::c_void, - raw::to_ptr(*b) as *libc::c_void, n) as int + libc::memcmp(raw::to_ptr(a) as *libc::c_void, + raw::to_ptr(b) as *libc::c_void, n) as int }; if r != 0 { r } else { @@ -2425,22 +2425,22 @@ pub mod bytes { } /// Bytewise less than or equal - pub fn lt(a: &~[u8], b: &~[u8]) -> bool { memcmp(a, b) < 0 } + pub fn lt(a: &[u8], b: &[u8]) -> bool { memcmp(a, b) < 0 } /// Bytewise less than or equal - pub fn le(a: &~[u8], b: &~[u8]) -> bool { memcmp(a, b) <= 0 } + pub fn le(a: &[u8], b: &[u8]) -> bool { memcmp(a, b) <= 0 } /// Bytewise equality - pub fn eq(a: &~[u8], b: &~[u8]) -> bool { memcmp(a, b) == 0 } + pub fn eq(a: &[u8], b: &[u8]) -> bool { memcmp(a, b) == 0 } /// Bytewise inequality - pub fn ne(a: &~[u8], b: &~[u8]) -> bool { memcmp(a, b) != 0 } + pub fn ne(a: &[u8], b: &[u8]) -> bool { memcmp(a, b) != 0 } /// Bytewise greater than or equal - pub fn ge(a: &~[u8], b: &~[u8]) -> bool { memcmp(a, b) >= 0 } + pub fn ge(a: &[u8], b: &[u8]) -> bool { memcmp(a, b) >= 0 } /// Bytewise greater than - pub fn gt(a: &~[u8], b: &~[u8]) -> bool { memcmp(a, b) > 0 } + pub fn gt(a: &[u8], b: &[u8]) -> bool { memcmp(a, b) > 0 } /** * Copies data from one vector to another. diff --git a/src/test/bench/shootout-fasta-redux.rs b/src/test/bench/shootout-fasta-redux.rs index bc1685a10922..ecacdac6f5fc 100644 --- a/src/test/bench/shootout-fasta-redux.rs +++ b/src/test/bench/shootout-fasta-redux.rs @@ -90,10 +90,9 @@ impl RepeatFasta { fn make(&mut self, n: uint) { unsafe { - let stdout = self.stdout; let alu_len = self.alu.len(); let mut buf = vec::from_elem(alu_len + LINE_LEN, 0u8); - let alu: &[u8] = str::byte_slice_no_callback(self.alu); + let alu = str::as_bytes_slice(self.alu); copy_memory(buf, alu, alu_len); copy_memory(vec::mut_slice(buf, alu_len, buf.len()), @@ -105,8 +104,8 @@ impl RepeatFasta { let mut n = n; while n > 0 { bytes = min(LINE_LEN, n); - fwrite(transmute(&buf[pos]), bytes as size_t, 1, stdout); - fputc('\n' as c_int, stdout); + fwrite(transmute(&buf[pos]), bytes as size_t, 1, self.stdout); + fputc('\n' as c_int, self.stdout); pos += bytes; if pos > alu_len { pos -= alu_len; diff --git a/src/test/run-pass/const-str-ptr.rs b/src/test/run-pass/const-str-ptr.rs index 2f0cd3c611f6..2e6ccd08ce30 100644 --- a/src/test/run-pass/const-str-ptr.rs +++ b/src/test/run-pass/const-str-ptr.rs @@ -16,7 +16,7 @@ static b: *u8 = c as *u8; pub fn main() { let foo = &a as *u8; - assert_eq!(unsafe { str::raw::from_bytes(a) }, ~"hi\x00"); + assert_eq!(unsafe { str::raw::from_bytes_slice(a) }, "hi\x00"); assert_eq!(unsafe { str::raw::from_buf(foo) }, ~"hi"); assert_eq!(unsafe { str::raw::from_buf(b) }, ~"hi"); assert!(unsafe { *b == a[0] });