From c24ef078b0ab251d83c97ec198fd5adfb320d177 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 10 Mar 2015 16:29:02 -0700 Subject: [PATCH] std: Deprecate the `str::CharRange` type This struct and the associated functions `char_range_at` and `char_range_at_reverse` have been unstable for some time now, and it looks like with a combination of `char_at` plus `char_at_reverse` plus `len_utf8` that the structure isn't necessary at this time. The structure and perhaps more fanciful unicode processing support could be added at a later date, but for now the types are being deprecated and slated for removal. Any code currently using `CharRange` can instead use the `char_at` and `char_at_reverse` methods plus the `len_utf8` method on `char` as a replacement. Closes #9387 [breaking-change] --- src/compiletest/runtest.rs | 18 +++++++++--------- src/libcollections/str.rs | 8 ++++++++ src/libcollections/string.rs | 9 +++++---- src/libcore/str/mod.rs | 15 +++++++++++++-- src/libgetopts/lib.rs | 12 ++++++------ src/libsyntax/parse/lexer/comments.rs | 7 +++---- src/libsyntax/parse/lexer/mod.rs | 16 +++++++++------- src/libunicode/u_str.rs | 2 +- 8 files changed, 54 insertions(+), 33 deletions(-) diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index 04714b50fc027..67ad06c6ec2b4 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -1049,22 +1049,22 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool { if *idx >= haystack.len() { return false; } - let range = haystack.char_range_at(*idx); - if range.ch != needle { + let ch = haystack.char_at(*idx); + if ch != needle { return false; } - *idx = range.next; + *idx += ch.len_utf8(); return true; } fn scan_integer(haystack: &str, idx: &mut uint) -> bool { let mut i = *idx; while i < haystack.len() { - let range = haystack.char_range_at(i); - if range.ch < '0' || '9' < range.ch { + let ch = haystack.char_at(i); + if ch < '0' || '9' < ch { break; } - i = range.next; + i += ch.len_utf8(); } if i == *idx { return false; @@ -1080,9 +1080,9 @@ fn scan_string(haystack: &str, needle: &str, idx: &mut uint) -> bool { if haystack_i >= haystack.len() { return false; } - let range = haystack.char_range_at(haystack_i); - haystack_i = range.next; - if !scan_char(needle, range.ch, &mut needle_i) { + let ch = haystack.char_at(haystack_i); + haystack_i += ch.len_utf8(); + if !scan_char(needle, ch, &mut needle_i) { return false; } } diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 49317a7f0cecd..87a420f02aeec 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -1061,6 +1061,10 @@ pub trait StrExt: Index { /// ``` #[unstable(feature = "collections", reason = "naming is uncertain with container conventions")] + #[deprecated(since = "1.0.0", + reason = "replaced with other unicode functions such as \ + char_indices or len_utf8")] + #[allow(deprecated)] fn char_range_at(&self, start: usize) -> CharRange { core_str::StrExt::char_range_at(&self[..], start) } @@ -1109,6 +1113,10 @@ pub trait StrExt: Index { /// ``` #[unstable(feature = "collections", reason = "naming is uncertain with container conventions")] + #[deprecated(since = "1.0.0", + reason = "replaced with other unicode functions such as \ + char_indices or len_utf8")] + #[allow(deprecated)] fn char_range_at_reverse(&self, start: usize) -> CharRange { core_str::StrExt::char_range_at_reverse(&self[..], start) } diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 83c63e47e506b..93eba89e0de5a 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -29,7 +29,7 @@ use unicode::str as unicode_str; use unicode::str::Utf16Item; use borrow::{Cow, IntoCow}; -use str::{self, CharRange, FromStr, Utf8Error}; +use str::{self, FromStr, Utf8Error}; use vec::{DerefVec, Vec, as_vec}; /// A growable string stored as a UTF-8 encoded buffer. @@ -532,9 +532,9 @@ impl String { return None } - let CharRange {ch, next} = self.char_range_at_reverse(len); + let ch = self.char_at_reverse(len); unsafe { - self.vec.set_len(next); + self.vec.set_len(len - ch.len_utf8()); } Some(ch) } @@ -566,7 +566,8 @@ impl String { let len = self.len(); assert!(idx <= len); - let CharRange { ch, next } = self.char_range_at(idx); + let ch = self.char_at(idx); + let next = idx + ch.len_utf8(); unsafe { ptr::copy(self.vec.as_mut_ptr().offset(idx as isize), self.vec.as_ptr().offset(next as isize), diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 1d4b81512dda8..fca4b6a1b84f5 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -18,6 +18,7 @@ use self::OldSearcher::{TwoWay, TwoWayLong}; +use char::CharExt; use clone::Clone; use cmp::{self, Eq}; use default::Default; @@ -1108,9 +1109,13 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [ /// Struct that contains a `char` and the index of the first byte of /// the next `char` in a string. This can be used as a data structure /// for iterating over the UTF-8 bytes of a string. +#[allow(deprecated)] #[derive(Copy)] #[unstable(feature = "core", reason = "naming is uncertain with container conventions")] +#[deprecated(since = "1.0.0", + reason = "replaced with other unicode functions such as \ + char_indices or len_utf8")] pub struct CharRange { /// Current `char` pub ch: char, @@ -1354,7 +1359,9 @@ pub trait StrExt { fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str where P::Searcher: ReverseSearcher<'a>; fn is_char_boundary(&self, index: usize) -> bool; + #[allow(deprecated)] fn char_range_at(&self, start: usize) -> CharRange; + #[allow(deprecated)] fn char_range_at_reverse(&self, start: usize) -> CharRange; fn char_at(&self, i: usize) -> char; fn char_at_reverse(&self, i: usize) -> char; @@ -1573,12 +1580,14 @@ impl StrExt for str { } #[inline] + #[allow(deprecated)] fn char_range_at(&self, i: usize) -> CharRange { let (c, n) = char_range_at_raw(self.as_bytes(), i); CharRange { ch: unsafe { mem::transmute(c) }, next: n } } #[inline] + #[allow(deprecated)] fn char_range_at_reverse(&self, start: usize) -> CharRange { let mut prev = start; @@ -1610,11 +1619,13 @@ impl StrExt for str { } #[inline] + #[allow(deprecated)] fn char_at(&self, i: usize) -> char { self.char_range_at(i).ch } #[inline] + #[allow(deprecated)] fn char_at_reverse(&self, i: usize) -> char { self.char_range_at_reverse(i).ch } @@ -1643,8 +1654,8 @@ impl StrExt for str { if self.is_empty() { None } else { - let CharRange {ch, next} = self.char_range_at(0); - let next_s = unsafe { self.slice_unchecked(next, self.len()) }; + let ch = self.char_at(0); + let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) }; Some((ch, next_s)) } } diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs index 38abf3881bdf6..21b0e9e56ef3d 100644 --- a/src/libgetopts/lib.rs +++ b/src/libgetopts/lib.rs @@ -95,7 +95,6 @@ #![feature(collections)] #![feature(int_uint)] #![feature(staged_api)] -#![feature(core)] #![feature(str_words)] #![cfg_attr(test, feature(rustc_private))] @@ -620,8 +619,8 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result { let mut j = 1; names = Vec::new(); while j < curlen { - let range = cur.char_range_at(j); - let opt = Short(range.ch); + let ch = cur.char_at(j); + let opt = Short(ch); /* In a series of potential options (eg. -aheJ), if we see one which takes an argument, we assume all @@ -642,12 +641,13 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result { No => false }; - if arg_follows && range.next < curlen { - i_arg = Some((&cur[range.next..curlen]).to_string()); + let next = j + ch.len_utf8(); + if arg_follows && next < curlen { + i_arg = Some((&cur[next..curlen]).to_string()); break; } - j = range.next; + j = next; } } let mut name_pos = 0; diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index fb9e0480cebc8..277f5365db3ec 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -20,7 +20,6 @@ use parse::lexer; use print::pprust; use std::io::Read; -use std::str; use std::usize; #[derive(Clone, Copy, PartialEq)] @@ -210,11 +209,11 @@ fn all_whitespace(s: &str, col: CharPos) -> Option { let mut col = col.to_usize(); let mut cursor: usize = 0; while col > 0 && cursor < len { - let r: str::CharRange = s.char_range_at(cursor); - if !r.ch.is_whitespace() { + let ch = s.char_at(cursor); + if !ch.is_whitespace() { return None; } - cursor = r.next; + cursor += ch.len_utf8(); col -= 1; } return Some(cursor); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index f5781e0587d24..d8ff62b6c4edc 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -22,7 +22,6 @@ use std::fmt; use std::mem::replace; use std::num; use std::rc::Rc; -use std::str; pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag}; @@ -291,7 +290,8 @@ impl<'a> StringReader<'a> { s: &'b str, errmsg: &'b str) -> Cow<'b, str> { let mut i = 0; while i < s.len() { - let str::CharRange { ch, next } = s.char_range_at(i); + let ch = s.char_at(i); + let next = i + ch.len_utf8(); if ch == '\r' { if next < s.len() && s.char_at(next) == '\n' { return translate_crlf_(self, start, s, errmsg, i).into_cow(); @@ -309,7 +309,8 @@ impl<'a> StringReader<'a> { let mut buf = String::with_capacity(s.len()); let mut j = 0; while i < s.len() { - let str::CharRange { ch, next } = s.char_range_at(i); + let ch = s.char_at(i); + let next = i + ch.len_utf8(); if ch == '\r' { if j < i { buf.push_str(&s[j..i]); } j = next; @@ -335,10 +336,11 @@ impl<'a> StringReader<'a> { if current_byte_offset < self.source_text.len() { assert!(self.curr.is_some()); let last_char = self.curr.unwrap(); - let next = self.source_text.char_range_at(current_byte_offset); - let byte_offset_diff = next.next - current_byte_offset; + let ch = self.source_text.char_at(current_byte_offset); + let next = current_byte_offset + ch.len_utf8(); + let byte_offset_diff = next - current_byte_offset; self.pos = self.pos + Pos::from_usize(byte_offset_diff); - self.curr = Some(next.ch); + self.curr = Some(ch); self.col = self.col + CharPos(1); if last_char == '\n' { self.filemap.next_line(self.last_pos); @@ -370,7 +372,7 @@ impl<'a> StringReader<'a> { let offset = self.byte_offset(self.pos).to_usize(); let s = &self.source_text[..]; if offset >= s.len() { return None } - let str::CharRange { next, .. } = s.char_range_at(offset); + let next = offset + s.char_at(offset).len_utf8(); if next < s.len() { Some(s.char_at(next)) } else { diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 57439addeaa1a..4b9948cb56276 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -243,7 +243,7 @@ impl<'a> Iterator for Graphemes<'a> { } self.cat = if take_curr { - idx = self.string.char_range_at(idx).next; + idx = idx + UCharExt::len_utf8(self.string.char_at(idx)); None } else { Some(cat)