diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index ff6b1b1533324..dd0a14dfcdbc7 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -1515,7 +1515,7 @@ fn _arm_exec_compiled_test(config: &Config, let mut exitcode: int = 0; for c in exitcode_out.as_slice().chars() { - if !c.is_digit() { break; } + if !c.is_numeric() { break; } exitcode = exitcode * 10 + match c { '0' ... '9' => c as int - ('0' as int), _ => 101, diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index f49371b8e8862..bc08c4f034ca2 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -698,7 +698,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_default(|c| out.push(c)); + for c in c.escape_default() { + out.push(c); + } } out } @@ -708,7 +710,9 @@ pub trait StrAllocating: Str { let me = self.as_slice(); let mut out = String::with_capacity(me.len()); for c in me.chars() { - c.escape_unicode(|c| out.push(c)); + for c in c.escape_unicode() { + out.push(c); + } } out } @@ -1273,7 +1277,7 @@ mod tests { assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12"); - assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123"); + assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123"); } #[test] @@ -1288,7 +1292,7 @@ mod tests { assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar"); - assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar"); + assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar"); } #[test] @@ -1303,7 +1307,7 @@ mod tests { assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar"); let chars: &[char] = &['1', '2']; assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar"); - assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar"); + assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar"); } #[test] diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 061064ff803d5..8622267483e80 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -19,8 +19,6 @@ use core::fmt; use core::mem; use core::ptr; use core::ops; -// FIXME: ICE's abound if you import the `Slice` type while importing `Slice` trait -use core::raw::Slice as RawSlice; use {Mutable, MutableSeq}; use hash; @@ -540,12 +538,13 @@ impl String { unsafe { // Attempt to not use an intermediate buffer by just pushing bytes // directly onto this string. - let slice = RawSlice { - data: self.vec.as_ptr().offset(cur_len as int), - len: 4, - }; - let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0); - self.vec.set_len(cur_len + used); + let buf = self.vec.as_mut_ptr().offset(cur_len as int); + let mut used = 0; + for byte in ch.encode_utf8() { + *buf.offset(used) = byte; + used += 1; + } + self.vec.set_len(cur_len + (used as uint)); } } @@ -798,16 +797,15 @@ impl String { assert!(idx <= len); assert!(self.as_slice().is_char_boundary(idx)); self.vec.reserve_additional(4); - let mut bits = [0, ..4]; - let amt = ch.encode_utf8(bits).unwrap(); + let amt = ch.len_utf8(); unsafe { ptr::copy_memory(self.vec.as_mut_ptr().offset((idx + amt) as int), self.vec.as_ptr().offset(idx as int), len - idx); - ptr::copy_memory(self.vec.as_mut_ptr().offset(idx as int), - bits.as_ptr(), - amt); + for (i, byte) in ch.encode_utf8().enumerate() { + *self.vec.as_mut_ptr().offset((idx + i) as int) = byte + } self.vec.set_len(len + amt); } } diff --git a/src/libcore/char.rs b/src/libcore/char.rs index f507556909c8f..13522e3aa7ec3 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -15,9 +15,10 @@ #![allow(non_snake_case)] #![doc(primitive = "char")] +use clone::Clone; use mem::transmute; use option::{None, Option, Some}; -use iter::range_step; +use iter::{range_step, Iterator, RangeStep}; use collections::Collection; // UTF-8 ranges and tags for encoding characters @@ -63,10 +64,12 @@ static MAX_THREE_B: u32 = 0x10000u32; */ /// The highest valid code point +#[stable] pub const MAX: char = '\U0010ffff'; /// Converts from `u32` to a `char` #[inline] +#[unstable = "pending decisions about costructors for primitives"] pub fn from_u32(i: u32) -> Option { // catch out-of-bounds and surrogates if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { @@ -96,11 +99,9 @@ pub fn from_u32(i: u32) -> Option { /// This just wraps `to_digit()`. /// #[inline] +#[deprecated = "use the Char::is_digit method"] pub fn is_digit_radix(c: char, radix: uint) -> bool { - match to_digit(c, radix) { - Some(_) => true, - None => false, - } + c.is_digit(radix) } /// @@ -118,18 +119,9 @@ pub fn is_digit_radix(c: char, radix: uint) -> bool { /// Fails if given a `radix` outside the range `[0..36]`. /// #[inline] +#[deprecated = "use the Char::to_digit method"] pub fn to_digit(c: char, radix: uint) -> Option { - if radix > 36 { - fail!("to_digit: radix is too high (maximum 36)"); - } - let val = match c { - '0' ... '9' => c as uint - ('0' as uint), - 'a' ... 'z' => c as uint + 10u - ('a' as uint), - 'A' ... 'Z' => c as uint + 10u - ('A' as uint), - _ => return None, - }; - if val < radix { Some(val) } - else { None } + c.to_digit(radix) } /// @@ -145,6 +137,7 @@ pub fn to_digit(c: char, radix: uint) -> Option { /// Fails if given an `radix` > 36. /// #[inline] +#[unstable = "pending decisions about costructors for primitives"] pub fn from_digit(num: uint, radix: uint) -> Option { if radix > 36 { fail!("from_digit: radix is too high (maximum 36)"); @@ -171,23 +164,10 @@ pub fn from_digit(num: uint, radix: uint) -> Option { /// - chars in [0x100,0xffff] get 4-digit escapes: `\\uNNNN` /// - chars above 0x10000 get 8-digit escapes: `\\UNNNNNNNN` /// +#[deprecated = "use the Char::escape_unicode method"] pub fn escape_unicode(c: char, f: |char|) { - // avoid calling str::to_str_radix because we don't really need to allocate - // here. - f('\\'); - let pad = match () { - _ if c <= '\xff' => { f('x'); 2 } - _ if c <= '\uffff' => { f('u'); 4 } - _ => { f('U'); 8 } - }; - for offset in range_step::(4 * (pad - 1), -1, -4) { - let offset = offset as uint; - unsafe { - match ((c as i32) >> offset) & 0xf { - i @ 0 ... 9 => { f(transmute('0' as i32 + i)); } - i => { f(transmute('a' as i32 + (i - 10))); } - } - } + for char in c.escape_unicode() { + f(char); } } @@ -203,32 +183,22 @@ pub fn escape_unicode(c: char, f: |char|) { /// - Any other chars in the range [0x20,0x7e] are not escaped. /// - Any other chars are given hex Unicode escapes; see `escape_unicode`. /// +#[deprecated = "use the Char::escape_default method"] pub fn escape_default(c: char, f: |char|) { - match c { - '\t' => { f('\\'); f('t'); } - '\r' => { f('\\'); f('r'); } - '\n' => { f('\\'); f('n'); } - '\\' => { f('\\'); f('\\'); } - '\'' => { f('\\'); f('\''); } - '"' => { f('\\'); f('"'); } - '\x20' ... '\x7e' => { f(c); } - _ => c.escape_unicode(f), + for c in c.escape_default() { + f(c); } } /// Returns the amount of bytes this `char` would need if encoded in UTF-8 #[inline] +#[deprecated = "use the Char::len_utf8 method"] pub fn len_utf8_bytes(c: char) -> uint { - let code = c as u32; - match () { - _ if code < MAX_ONE_B => 1u, - _ if code < MAX_TWO_B => 2u, - _ if code < MAX_THREE_B => 3u, - _ => 4u, - } + c.len_utf8() } /// Basic `char` manipulations. +#[experimental = "trait organization may change"] pub trait Char { /// Checks if a `char` parses as a numeric digit in the given radix. /// @@ -243,7 +213,24 @@ pub trait Char { /// # Failure /// /// Fails if given a radix > 36. - fn is_digit_radix(&self, radix: uint) -> bool; + #[deprecated = "use is_digit"] + fn is_digit_radix(self, radix: uint) -> bool; + + /// Checks if a `char` parses as a numeric digit in the given radix. + /// + /// Compared to `is_digit()`, this function only recognizes the characters + /// `0-9`, `a-z` and `A-Z`. + /// + /// # Return value + /// + /// Returns `true` if `c` is a valid digit under `radix`, and `false` + /// otherwise. + /// + /// # Failure + /// + /// Fails if given a radix > 36. + #[unstable = "pending error conventions"] + fn is_digit(self, radix: uint) -> bool; /// Converts a character to the corresponding digit. /// @@ -256,7 +243,8 @@ pub trait Char { /// # Failure /// /// Fails if given a radix outside the range [0..36]. - fn to_digit(&self, radix: uint) -> Option; + #[unstable = "pending error conventions, trait organization"] + fn to_digit(self, radix: uint) -> Option; /// Converts a number to the character representing it. /// @@ -268,8 +256,13 @@ pub trait Char { /// # Failure /// /// Fails if given a radix > 36. + #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option; + /// Converts from `u32` to a `char` + #[deprecated = "use the char::from_u32 free function"] + fn from_u32(i: u32) -> Option; + /// Returns the hexadecimal Unicode escape of a character. /// /// The rules are as follows: @@ -277,7 +270,8 @@ pub trait Char { /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN` /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`. /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`. - fn escape_unicode(&self, f: |char|); + #[unstable = "pending error conventions, trait organization"] + fn escape_unicode(self) -> UnicodeEscapedChars; /// Returns a 'default' ASCII and C++11-like literal escape of a /// character. @@ -291,84 +285,290 @@ pub trait Char { /// escaped. /// * Any other chars in the range [0x20,0x7e] are not escaped. /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. - fn escape_default(&self, f: |char|); + #[unstable = "pending error conventions, trait organization"] + fn escape_default(self) -> DefaultEscapedChars; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-8. + #[deprecated = "use len_utf8"] + fn len_utf8_bytes(self) -> uint; /// Returns the amount of bytes this character would need if encoded in /// UTF-8. - fn len_utf8_bytes(&self) -> uint; + #[unstable = "pending trait organization"] + fn len_utf8(self) -> uint; + + /// Returns the amount of bytes this character would need if encoded in + /// UTF-16. + #[unstable = "pending trait organization"] + fn len_utf16(self) -> uint; /// Encodes this character as UTF-8 into the provided byte buffer, /// and then returns the number of bytes written. /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. - fn encode_utf8(&self, dst: &mut [u8]) -> Option; + #[unstable = "pending trait organization"] + fn encode_utf8(self) -> Utf8CodeUnits; /// Encodes this character as UTF-16 into the provided `u16` buffer, /// and then returns the number of `u16`s written. /// /// If the buffer is not large enough, nothing will be written into it /// and a `None` will be returned. - fn encode_utf16(&self, dst: &mut [u16]) -> Option; + #[unstable = "pending trait organization"] + fn encode_utf16(self) -> Utf16CodeUnits; } +#[experimental = "trait is experimental"] impl Char for char { - fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) } + #[deprecated = "use is_digit"] + fn is_digit_radix(self, radix: uint) -> bool { self.is_digit(radix) } + + #[unstable = "pending trait organization"] + fn is_digit(self, radix: uint) -> bool { + match self.to_digit(radix) { + Some(_) => true, + None => false, + } + } - fn to_digit(&self, radix: uint) -> Option { to_digit(*self, radix) } + #[unstable = "pending trait organization"] + fn to_digit(self, radix: uint) -> Option { + if radix > 36 { + fail!("to_digit: radix is too high (maximum 36)"); + } + let val = match self { + '0' ... '9' => self as uint - ('0' as uint), + 'a' ... 'z' => self as uint + 10u - ('a' as uint), + 'A' ... 'Z' => self as uint + 10u - ('A' as uint), + _ => return None, + }; + if val < radix { Some(val) } + else { None } + } + #[deprecated = "use the char::from_digit free function"] fn from_digit(num: uint, radix: uint) -> Option { from_digit(num, radix) } - fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) } + #[inline] + #[deprecated = "use the char::from_u32 free function"] + fn from_u32(i: u32) -> Option { from_u32(i) } - fn escape_default(&self, f: |char|) { escape_default(*self, f) } + #[unstable = "pending error conventions, trait organization"] + fn escape_unicode(self) -> UnicodeEscapedChars { + UnicodeEscapedChars { c: self, state: EscapeBackslash } + } + + #[unstable = "pending error conventions, trait organization"] + fn escape_default(self) -> DefaultEscapedChars { + let init_state = match self { + '\t' => DefaultEscapeBackslash('t'), + '\r' => DefaultEscapeBackslash('r'), + '\n' => DefaultEscapeBackslash('n'), + '\\' => DefaultEscapeBackslash('\\'), + '\'' => DefaultEscapeBackslash('\''), + '"' => DefaultEscapeBackslash('"'), + '\x20' ... '\x7e' => DefaultEscapeChar(self), + _ => DefaultEscapeUnicode(self.escape_unicode()) + }; + DefaultEscapedChars { state: init_state } + } #[inline] - fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) } + #[deprecated = "use len_utf8"] + fn len_utf8_bytes(self) -> uint { self.len_utf8() } #[inline] - fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> Option { - // Marked #[inline] to allow llvm optimizing it away - let code = *self as u32; - if code < MAX_ONE_B && dst.len() >= 1 { - dst[0] = code as u8; - Some(1) - } else if code < MAX_TWO_B && dst.len() >= 2 { - dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B; - dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(2) - } else if code < MAX_THREE_B && dst.len() >= 3 { - dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B; - dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; - dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(3) - } else if dst.len() >= 4 { - dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B; - dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT; - dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT; - dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT; - Some(4) - } else { - None + #[unstable = "pending trait organization"] + fn len_utf8(self) -> uint { + let code = self as u32; + match () { + _ if code < MAX_ONE_B => 1u, + _ if code < MAX_TWO_B => 2u, + _ if code < MAX_THREE_B => 3u, + _ => 4u, } } #[inline] - fn encode_utf16(&self, dst: &mut [u16]) -> Option { + #[unstable = "pending trait organization"] + fn len_utf16(self) -> uint { + let ch = self as u32; + if (ch & 0xFFFF_u32) == ch { 1 } else { 2 } + } + + #[inline] + #[unstable = "pending error conventions, trait organization"] + fn encode_utf8(self) -> Utf8CodeUnits { + let code = self as u32; + let (len, buf) = if code < MAX_ONE_B { + (1, [code as u8, 0, 0, 0]) + } else if code < MAX_TWO_B { + (2, [(code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B, + (code & 0x3F_u32) as u8 | TAG_CONT, + 0, 0]) + } else if code < MAX_THREE_B { + (3, [(code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B, + (code >> 6u & 0x3F_u32) as u8 | TAG_CONT, + (code & 0x3F_u32) as u8 | TAG_CONT, + 0]) + } else { + (4, [(code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B, + (code >> 12u & 0x3F_u32) as u8 | TAG_CONT, + (code >> 6u & 0x3F_u32) as u8 | TAG_CONT, + (code & 0x3F_u32) as u8 | TAG_CONT]) + }; + + Utf8CodeUnits { pos: 0, len: len, buf: buf } + } + + #[inline] + #[unstable = "pending error conventions, trait organization"] + fn encode_utf16(self) -> Utf16CodeUnits { // Marked #[inline] to allow llvm optimizing it away - let mut ch = *self as u32; - if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 { + let mut ch = self as u32; + let (len, buf) = if (ch & 0xFFFF_u32) == ch { // The BMP falls through (assuming non-surrogate, as it should) - dst[0] = ch as u16; - Some(1) - } else if dst.len() >= 2 { + (1, [ch as u16, 0]) + } else { // Supplementary planes break into surrogates. ch -= 0x1_0000_u32; - dst[0] = 0xD800_u16 | ((ch >> 10) as u16); - dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); - Some(2) + (2, [0xD800_u16 | ((ch >> 10) as u16), + 0xDC00_u16 | ((ch as u16) & 0x3FF_u16)]) + }; + + Utf16CodeUnits { pos: 0, len: len, buf: buf } + } +} + +/// An iterator over the bytes of a char encoded as UTF-8 +#[unstable = "pending error conventions, trait organization"] +pub struct Utf8CodeUnits { + pos: uint, + len: uint, + buf: [u8, ..4] +} + +#[unstable = "struct is unstable"] +impl Iterator for Utf8CodeUnits { + #[inline] + fn next(&mut self) -> Option { + if self.pos != self.len { + let next = self.buf[self.pos]; + self.pos += 1; + Some(next) } else { None } } } + +#[unstable = "struct is unstable"] +impl Clone for Utf8CodeUnits { + fn clone(&self) -> Utf8CodeUnits { + Utf8CodeUnits { pos: self.pos, len: self.len, buf: self.buf } + } +} + +/// An iterator over the bytes of a char encoded as UTF-8 +#[unstable = "pending error conventions, trait organization"] +pub struct Utf16CodeUnits { + pos: uint, + len: uint, + buf: [u16, ..2] +} + +#[unstable = "struct is unstable"] +impl Iterator for Utf16CodeUnits { + #[inline] + fn next(&mut self) -> Option { + if self.pos != self.len { + let next = self.buf[self.pos]; + self.pos += 1; + Some(next) + } else { + None + } + } +} + +#[unstable = "struct is unstable"] +impl Clone for Utf16CodeUnits { + fn clone(&self) -> Utf16CodeUnits { + Utf16CodeUnits { pos: self.pos, len: self.len, buf: self.buf } + } +} + +/// An iterator over the characters that represent a `char`, as escaped by +/// Rust's unicode escaping rules. +pub struct UnicodeEscapedChars { + c: char, + state: UnicodeEscapedCharsState +} + +enum UnicodeEscapedCharsState { + EscapeBackslash, + EscapeType, + EscapeValue(RangeStep), +} + +impl Iterator for UnicodeEscapedChars { + fn next(&mut self) -> Option { + match self.state { + EscapeBackslash => { + self.state = EscapeType; + Some('\\') + } + EscapeType => { + let (typechar, pad) = if self.c <= '\xff' { ('x', 2) } + else if self.c <= '\uffff' { ('u', 4) } + else { ('U', 8) }; + self.state = EscapeValue(range_step(4 * (pad - 1), -1, -4i32)); + Some(typechar) + } + EscapeValue(ref mut range_step) => match range_step.next() { + Some(offset) => { + let offset = offset as uint; + let v = match ((self.c as i32) >> offset) & 0xf { + i @ 0 ... 9 => '0' as i32 + i, + i => 'a' as i32 + (i - 10) + }; + Some(unsafe { transmute(v) }) + } + None => None + } + } + } +} + +/// An iterator over the characters that represent a `char`, escaped +/// for maximum portability. +pub struct DefaultEscapedChars { + state: DefaultEscapedCharsState +} + +enum DefaultEscapedCharsState { + DefaultEscapeBackslash(char), + DefaultEscapeChar(char), + DefaultEscapeDone, + DefaultEscapeUnicode(UnicodeEscapedChars), +} + +impl Iterator for DefaultEscapedChars { + fn next(&mut self) -> Option { + match self.state { + DefaultEscapeBackslash(c) => { + self.state = DefaultEscapeChar(c); + Some('\\') + } + DefaultEscapeChar(c) => { + self.state = DefaultEscapeDone; + Some(c) + } + DefaultEscapeDone => None, + DefaultEscapeUnicode(ref mut iter) => iter.next() + } + } +} + diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs index 343ab7cfd28b9..f86a0bbbf3ff6 100644 --- a/src/libcore/fmt/float.rs +++ b/src/libcore/fmt/float.rs @@ -11,6 +11,7 @@ #![allow(missing_doc)] use char; +use char::Char; use collections::Collection; use fmt; use iter::{range, DoubleEndedIterator}; @@ -220,7 +221,7 @@ pub fn float_to_str_bytes_common( // round the remaining ones. if limit_digits && dig == digit_count { let ascii2value = |chr: u8| { - char::to_digit(chr as char, radix).unwrap() + (chr as char).to_digit(radix).unwrap() }; let value2ascii = |val: uint| { char::from_digit(val, radix).unwrap() as u8 diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index 093f5896aad2d..5c441dfa888b3 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -421,9 +421,9 @@ impl<'a> Formatter<'a> { // Writes the sign if it exists, and then the prefix if it was requested let write_prefix = |f: &mut Formatter| { for c in sign.into_iter() { - let mut b = [0, ..4]; - let n = c.encode_utf8(b).unwrap_or(0); - try!(f.buf.write(b[..n])); + for byte in c.encode_utf8() { + try!(f.buf.write([byte])); + } } if prefixed { f.buf.write(prefix.as_bytes()) } else { Ok(()) } @@ -527,7 +527,11 @@ impl<'a> Formatter<'a> { }; let mut fill = [0u8, ..4]; - let len = self.fill.encode_utf8(fill).unwrap_or(0); + let mut len = 0; + for byte in self.fill.encode_utf8() { + fill[len] = byte; + len += 1; + } for _ in range(0, pre_pad) { try!(self.buf.write(fill[..len])); @@ -610,8 +614,12 @@ impl Char for char { use char::Char; let mut utf8 = [0u8, ..4]; - let amt = self.encode_utf8(utf8).unwrap_or(0); - let s: &str = unsafe { mem::transmute(utf8[..amt]) }; + let mut len = 0; + for byte in self.encode_utf8() { + utf8[len] = byte; + len += 1; + } + let s: &str = unsafe { mem::transmute(utf8[..len]) }; secret_string(&s, f) } } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index e8cd93ba7dc42..614b4bd22e0ad 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -724,27 +724,36 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> { /// External iterator for a string's UTF16 codeunits. /// Use with the `std::iter` module. +// NB: This could be implemented as a flatmap from Chars +// to char::Utf16CodeUnits, except that FlatMap is not +// Clone. #[deriving(Clone)] pub struct Utf16CodeUnits<'a> { chars: Chars<'a>, - extra: u16 + char_units: Option } impl<'a> Iterator for Utf16CodeUnits<'a> { #[inline] fn next(&mut self) -> Option { - if self.extra != 0 { - let tmp = self.extra; - self.extra = 0; - return Some(tmp); + match self.char_units { + Some(ref mut char_units) => { + match char_units.next() { + Some(unit) => Some(unit), + None => { + let next_char = self.chars.next(); + match next_char { + Some(next_char) => { + *char_units = next_char.encode_utf16(); + char_units.next() + } + None => None + } + } + } + } + None => None } - - let mut buf = [0u16, ..2]; - self.chars.next().map(|ch| { - let n = ch.encode_utf16(buf[mut]).unwrap_or(0); - if n == 2 { self.extra = buf[1]; } - buf[0] - }) } #[inline] @@ -1266,7 +1275,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]); /// - /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["abc", "def", "ghi"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect(); @@ -1287,7 +1296,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect(); /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]); /// - /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["abc", "def2ghi"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect(); @@ -1319,7 +1328,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect(); /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]); /// - /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_digit()).rev().collect(); + /// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).rev().collect(); /// assert_eq!(v, vec!["ghi", "def", "abc"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect(); @@ -1337,7 +1346,7 @@ pub trait StrSlice<'a> { /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect(); /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]); /// - /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_digit()).collect(); + /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |c: char| c.is_numeric()).collect(); /// assert_eq!(v, vec!["ghi", "abc1def"]); /// /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect(); @@ -1547,7 +1556,7 @@ pub trait StrSlice<'a> { /// assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_chars(x), "foo1bar") - /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar") + /// assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar") /// ``` fn trim_chars(&self, to_trim: C) -> &'a str; @@ -1563,7 +1572,7 @@ pub trait StrSlice<'a> { /// assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_left_chars(x), "foo1bar12") - /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123") + /// assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123") /// ``` fn trim_left_chars(&self, to_trim: C) -> &'a str; @@ -1579,7 +1588,7 @@ pub trait StrSlice<'a> { /// assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar") /// let x: &[_] = &['1', '2']; /// assert_eq!("12foo1bar12".trim_right_chars(x), "12foo1bar") - /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar") + /// assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar") /// ``` fn trim_right_chars(&self, to_trim: C) -> &'a str; @@ -2176,7 +2185,9 @@ impl<'a> StrSlice<'a> for &'a str { #[inline] fn utf16_units(&self) -> Utf16CodeUnits<'a> { - Utf16CodeUnits{ chars: self.chars(), extra: 0} + let mut chars = self.chars(); + let first_char_units = chars.next().map(|c| c.encode_utf16()); + Utf16CodeUnits{ chars: chars, char_units: first_char_units } } } diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 8807756d01b7e..bd84903a09214 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -105,12 +105,12 @@ fn test_is_control() { #[test] fn test_is_digit() { - assert!('2'.is_digit()); - assert!('7'.is_digit()); - assert!(!'c'.is_digit()); - assert!(!'i'.is_digit()); - assert!(!'z'.is_digit()); - assert!(!'Q'.is_digit()); + assert!('2'.is_numeric()); + assert!('7'.is_numeric()); + assert!(!'c'.is_numeric()); + assert!(!'i'.is_numeric()); + assert!(!'z'.is_numeric()); + assert!(!'Q'.is_numeric()); } #[test] @@ -172,9 +172,8 @@ fn test_escape_unicode() { #[test] fn test_encode_utf8() { fn check(input: char, expect: &[u8]) { - let mut buf = [0u8, ..4]; - let n = input.encode_utf8(buf.as_mut_slice()).unwrap_or(0); - assert_eq!(buf[..n], expect); + let buf: Vec = input.encode_utf8().collect(); + assert_eq!(buf[], expect); } check('x', [0x78]); @@ -186,9 +185,8 @@ fn test_encode_utf8() { #[test] fn test_encode_utf16() { fn check(input: char, expect: &[u16]) { - let mut buf = [0u16, ..2]; - let n = input.encode_utf16(buf.as_mut_slice()).unwrap_or(0); - assert_eq!(buf[..n], expect); + let buf: Vec = input.encode_utf16().collect(); + assert_eq!(buf[], expect); } check('x', [0x0078]); @@ -197,6 +195,14 @@ fn test_encode_utf16() { check('\U0001f4a9', [0xd83d, 0xdca9]); } +#[test] +fn test_len_utf16() { + assert!('x'.len_utf16() == 1); + assert!('\u00e9'.len_utf16() == 1); + assert!('\ua66e'.len_utf16() == 1); + assert!('\U0001f4a9'.len_utf16() == 2); +} + #[test] fn test_width() { assert_eq!('\x00'.width(false),Some(0)); diff --git a/src/libdebug/repr.rs b/src/libdebug/repr.rs index e27816c816539..430e36340fb8d 100644 --- a/src/libdebug/repr.rs +++ b/src/libdebug/repr.rs @@ -14,7 +14,6 @@ More runtime type reflection */ -use std::char; use std::intrinsics::{Disr, Opaque, TyDesc, TyVisitor, get_tydesc, visit_tydesc}; use std::io; use std::mem; @@ -229,9 +228,9 @@ impl<'a> ReprVisitor<'a> { } '\x20'...'\x7e' => self.writer.write([ch as u8]), _ => { - char::escape_unicode(ch, |c| { + for c in ch.escape_unicode() { let _ = self.writer.write([c as u8]); - }); + } Ok(()) } }); diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index a9f34e1195ce6..f8d4534a4d4c9 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -406,7 +406,7 @@ impl<'a> Parser<'a> { loop { match self.cur.clone().next() { Some((_, c)) => { - match char::to_digit(c, 10) { + match c.to_digit(10) { Some(i) => { cur = cur * 10 + i; found = true; diff --git a/src/libgraphviz/lib.rs b/src/libgraphviz/lib.rs index e21186a5fc879..c9c53459e5589 100644 --- a/src/libgraphviz/lib.rs +++ b/src/libgraphviz/lib.rs @@ -420,7 +420,7 @@ impl<'a> LabelText<'a> { // not escaping \\, since Graphviz escString needs to // interpret backslashes; see EscStr above. '\\' => f(c), - _ => c.escape_default(f) + _ => for c in c.escape_default() { f(c) } } } fn escape_str(s: &str) -> String { diff --git a/src/libhexfloat/lib.rs b/src/libhexfloat/lib.rs index 8335cc16d649f..4ffab6fba099d 100644 --- a/src/libhexfloat/lib.rs +++ b/src/libhexfloat/lib.rs @@ -93,7 +93,7 @@ fn hex_float_lit_err(s: &str) -> Option<(uint, String)> { } i+=1; if chars.peek() == Some(&'-') { chars.next(); i+= 1 } let mut e_len = 0i; - for _ in chars.take_while(|c| c.is_digit()) { chars.next(); i+=1; e_len += 1} + for _ in chars.take_while(|c| c.is_numeric()) { chars.next(); i+=1; e_len += 1} if e_len == 0 { return Some((i, "Expected exponent digits".to_string())); } diff --git a/src/librustc/lint/builtin.rs b/src/librustc/lint/builtin.rs index 988b128e31d5c..942d1ab96aa87 100644 --- a/src/librustc/lint/builtin.rs +++ b/src/librustc/lint/builtin.rs @@ -861,7 +861,7 @@ impl NonSnakeCase { let mut allow_underscore = true; ident.chars().all(|c| { allow_underscore = match c { - c if c.is_lowercase() || c.is_digit() => true, + c if c.is_lowercase() || c.is_numeric() => true, '_' if allow_underscore => false, _ => return false, }; diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index 7e9bb2844a7c7..7d782c66acaa6 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -2020,9 +2020,9 @@ fn lit_to_string(lit: &ast::Lit) -> String { ast::LitBinary(ref data) => format!("{:?}", data.as_slice()), ast::LitByte(b) => { let mut res = String::from_str("b'"); - (b as char).escape_default(|c| { + for c in (b as char).escape_default() { res.push(c); - }); + } res.push('\''); res }, diff --git a/src/libserialize/json.rs b/src/libserialize/json.rs index eda38e96cbb1a..b288f7cb96924 100644 --- a/src/libserialize/json.rs +++ b/src/libserialize/json.rs @@ -354,9 +354,11 @@ fn escape_str(writer: &mut io::Writer, v: &str) -> Result<(), io::IoError> { } fn escape_char(writer: &mut io::Writer, v: char) -> Result<(), io::IoError> { - let mut buf = [0, .. 4]; - v.encode_utf8(buf); - escape_bytes(writer, buf) + for byte in v.encode_utf8() { + try!(escape_bytes(writer, [byte])); + } + + Ok(()) } fn spaces(wr: &mut io::Writer, mut n: uint) -> Result<(), io::IoError> { diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 71d38ac66597f..9dd6bf709349e 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -592,10 +592,10 @@ mod tests { assert_eq!('`'.to_ascii().to_uppercase().to_char(), '`'); assert_eq!('{'.to_ascii().to_uppercase().to_char(), '{'); - assert!('0'.to_ascii().is_digit()); - assert!('9'.to_ascii().is_digit()); - assert!(!'/'.to_ascii().is_digit()); - assert!(!':'.to_ascii().is_digit()); + assert!('0'.to_ascii().is_numeric()); + assert!('9'.to_ascii().is_numeric()); + assert!(!'/'.to_ascii().is_numeric()); + assert!(!':'.to_ascii().is_numeric()); assert!((0x1fu8).to_ascii().is_control()); assert!(!' '.to_ascii().is_control()); diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 8592d48974a25..7fcbb6dd960ce 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -1110,9 +1110,11 @@ pub trait Writer { /// Write a single char, encoded as UTF-8. #[inline] fn write_char(&mut self, c: char) -> IoResult<()> { - let mut buf = [0u8, ..4]; - let n = c.encode_utf8(buf[mut]).unwrap_or(0); - self.write(buf[..n]) + for byte in c.encode_utf8() { + try!(self.write_u8(byte)); + } + + Ok(()) } /// Write the result of passing n through `int::to_str_bytes`. diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index 48ee7664c16ac..04f71fb7759e2 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -13,6 +13,7 @@ #![allow(missing_doc)] use char; +use char::Char; use clone::Clone; use collections::{Collection, MutableSeq}; use num::{NumCast, Zero, One, cast, Int}; @@ -618,7 +619,7 @@ pub fn from_str_bytes_common+ while i < len { let c = buf[i] as char; - match char::to_digit(c, radix) { + match c.to_digit(radix) { Some(digit) => { // shift accum one digit left accum = accum * radix_gen.clone(); @@ -673,7 +674,7 @@ pub fn from_str_bytes_common+ while i < len { let c = buf[i] as char; - match char::to_digit(c, radix) { + match c.to_digit(radix) { Some(digit) => { // Decrease power one order of magnitude power = power / radix_gen; diff --git a/src/libstd/rt/backtrace.rs b/src/libstd/rt/backtrace.rs index e05e533be56c5..250a3e7efafda 100644 --- a/src/libstd/rt/backtrace.rs +++ b/src/libstd/rt/backtrace.rs @@ -73,7 +73,7 @@ fn demangle(writer: &mut Writer, s: &str) -> IoResult<()> { while valid { let mut i = 0; for c in chars { - if c.is_digit() { + if c.is_numeric() { i = i * 10 + c as uint - '0' as uint; } else { break @@ -103,7 +103,7 @@ fn demangle(writer: &mut Writer, s: &str) -> IoResult<()> { first = false; } let mut rest = s; - while rest.char_at(0).is_digit() { + while rest.char_at(0).is_numeric() { rest = rest.slice_from(1); } let i: uint = from_str(s.slice_to(s.len() - rest.len())).unwrap(); diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index d32828192e996..898cbe6f72b81 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -2663,13 +2663,17 @@ impl<'a> State<'a> { ast::LitStr(ref st, style) => self.print_string(st.get(), style), ast::LitByte(byte) => { let mut res = String::from_str("b'"); - (byte as char).escape_default(|c| res.push_char(c)); + for c in (byte as char).escape_default() { + res.push_char(c); + } res.push_char('\''); word(&mut self.s, res.as_slice()) } ast::LitChar(ch) => { let mut res = String::from_str("'"); - ch.escape_default(|c| res.push_char(c)); + for c in ch.escape_default() { + res.push_char(c); + } res.push_char('\''); word(&mut self.s, res.as_slice()) } diff --git a/src/libterm/terminfo/parm.rs b/src/libterm/terminfo/parm.rs index a1bce6e8e8b04..bba29420b6334 100644 --- a/src/libterm/terminfo/parm.rs +++ b/src/libterm/terminfo/parm.rs @@ -10,7 +10,6 @@ //! Parameterized string expansion -use std::char; use std::mem::replace; #[deriving(PartialEq)] @@ -293,7 +292,7 @@ pub fn expand(cap: &[u8], params: &[Param], vars: &mut Variables) }, PushParam => { // params are 1-indexed - stack.push(mparams[match char::to_digit(cur, 10) { + stack.push(mparams[match cur.to_digit(10) { Some(d) => d - 1, None => return Err("bad param number".to_string()) }].clone()); diff --git a/src/libunicode/u_char.rs b/src/libunicode/u_char.rs index f725cdba64ef5..1dec001c3d026 100644 --- a/src/libunicode/u_char.rs +++ b/src/libunicode/u_char.rs @@ -217,7 +217,7 @@ pub trait UnicodeChar { fn is_control(&self) -> bool; /// Indicates whether the character is numeric (Nd, Nl, or No). - fn is_digit(&self) -> bool; + fn is_numeric(&self) -> bool; /// Converts a character to its lowercase equivalent. /// @@ -279,7 +279,7 @@ impl UnicodeChar for char { fn is_control(&self) -> bool { is_control(*self) } - fn is_digit(&self) -> bool { is_digit(*self) } + fn is_numeric(&self) -> bool { is_digit(*self) } fn to_lowercase(&self) -> char { to_lowercase(*self) }