From c24ef078b0ab251d83c97ec198fd5adfb320d177 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Tue, 10 Mar 2015 16:29:02 -0700
Subject: [PATCH] std: Deprecate the `str::CharRange` type

This struct and the associated functions `char_range_at` and
`char_range_at_reverse` have been unstable for some time now, and it looks like
with a combination of `char_at` plus `char_at_reverse` plus `len_utf8` that the
structure isn't necessary at this time.

The structure and perhaps more fanciful unicode processing support could be
added at a later date, but for now the types are being deprecated and slated for
removal.

Any code currently using `CharRange` can instead use the `char_at` and
`char_at_reverse` methods plus the `len_utf8` method on `char` as a replacement.

Closes #9387
[breaking-change]
---
 src/compiletest/runtest.rs            | 18 +++++++++---------
 src/libcollections/str.rs             |  8 ++++++++
 src/libcollections/string.rs          |  9 +++++----
 src/libcore/str/mod.rs                | 15 +++++++++++++--
 src/libgetopts/lib.rs                 | 12 ++++++------
 src/libsyntax/parse/lexer/comments.rs |  7 +++----
 src/libsyntax/parse/lexer/mod.rs      | 16 +++++++++-------
 src/libunicode/u_str.rs               |  2 +-
 8 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs
index 04714b50fc027..67ad06c6ec2b4 100644
--- a/src/compiletest/runtest.rs
+++ b/src/compiletest/runtest.rs
@@ -1049,22 +1049,22 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool {
     if *idx >= haystack.len() {
         return false;
     }
-    let range = haystack.char_range_at(*idx);
-    if range.ch != needle {
+    let ch = haystack.char_at(*idx);
+    if ch != needle {
         return false;
     }
-    *idx = range.next;
+    *idx += ch.len_utf8();
     return true;
 }
 
 fn scan_integer(haystack: &str, idx: &mut uint) -> bool {
     let mut i = *idx;
     while i < haystack.len() {
-        let range = haystack.char_range_at(i);
-        if range.ch < '0' || '9' < range.ch {
+        let ch = haystack.char_at(i);
+        if ch < '0' || '9' < ch {
             break;
         }
-        i = range.next;
+        i += ch.len_utf8();
     }
     if i == *idx {
         return false;
@@ -1080,9 +1080,9 @@ fn scan_string(haystack: &str, needle: &str, idx: &mut uint) -> bool {
         if haystack_i >= haystack.len() {
             return false;
         }
-        let range = haystack.char_range_at(haystack_i);
-        haystack_i = range.next;
-        if !scan_char(needle, range.ch, &mut needle_i) {
+        let ch = haystack.char_at(haystack_i);
+        haystack_i += ch.len_utf8();
+        if !scan_char(needle, ch, &mut needle_i) {
             return false;
         }
     }
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index 49317a7f0cecd..87a420f02aeec 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -1061,6 +1061,10 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// ```
     #[unstable(feature = "collections",
                reason = "naming is uncertain with container conventions")]
+    #[deprecated(since = "1.0.0",
+                 reason = "replaced with other unicode functions such as \
+                           char_indices or len_utf8")]
+    #[allow(deprecated)]
     fn char_range_at(&self, start: usize) -> CharRange {
         core_str::StrExt::char_range_at(&self[..], start)
     }
@@ -1109,6 +1113,10 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// ```
     #[unstable(feature = "collections",
                reason = "naming is uncertain with container conventions")]
+    #[deprecated(since = "1.0.0",
+                 reason = "replaced with other unicode functions such as \
+                           char_indices or len_utf8")]
+    #[allow(deprecated)]
     fn char_range_at_reverse(&self, start: usize) -> CharRange {
         core_str::StrExt::char_range_at_reverse(&self[..], start)
     }
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index 83c63e47e506b..93eba89e0de5a 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -29,7 +29,7 @@ use unicode::str as unicode_str;
 use unicode::str::Utf16Item;
 
 use borrow::{Cow, IntoCow};
-use str::{self, CharRange, FromStr, Utf8Error};
+use str::{self, FromStr, Utf8Error};
 use vec::{DerefVec, Vec, as_vec};
 
 /// A growable string stored as a UTF-8 encoded buffer.
@@ -532,9 +532,9 @@ impl String {
             return None
         }
 
-        let CharRange {ch, next} = self.char_range_at_reverse(len);
+        let ch = self.char_at_reverse(len);
         unsafe {
-            self.vec.set_len(next);
+            self.vec.set_len(len - ch.len_utf8());
         }
         Some(ch)
     }
@@ -566,7 +566,8 @@ impl String {
         let len = self.len();
         assert!(idx <= len);
 
-        let CharRange { ch, next } = self.char_range_at(idx);
+        let ch = self.char_at(idx);
+        let next = idx + ch.len_utf8();
         unsafe {
             ptr::copy(self.vec.as_mut_ptr().offset(idx as isize),
                       self.vec.as_ptr().offset(next as isize),
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index 1d4b81512dda8..fca4b6a1b84f5 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -18,6 +18,7 @@
 
 use self::OldSearcher::{TwoWay, TwoWayLong};
 
+use char::CharExt;
 use clone::Clone;
 use cmp::{self, Eq};
 use default::Default;
@@ -1108,9 +1109,13 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
 /// Struct that contains a `char` and the index of the first byte of
 /// the next `char` in a string.  This can be used as a data structure
 /// for iterating over the UTF-8 bytes of a string.
+#[allow(deprecated)]
 #[derive(Copy)]
 #[unstable(feature = "core",
            reason = "naming is uncertain with container conventions")]
+#[deprecated(since = "1.0.0",
+             reason = "replaced with other unicode functions such as \
+                       char_indices or len_utf8")]
 pub struct CharRange {
     /// Current `char`
     pub ch: char,
@@ -1354,7 +1359,9 @@ pub trait StrExt {
     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
         where P::Searcher: ReverseSearcher<'a>;
     fn is_char_boundary(&self, index: usize) -> bool;
+    #[allow(deprecated)]
     fn char_range_at(&self, start: usize) -> CharRange;
+    #[allow(deprecated)]
     fn char_range_at_reverse(&self, start: usize) -> CharRange;
     fn char_at(&self, i: usize) -> char;
     fn char_at_reverse(&self, i: usize) -> char;
@@ -1573,12 +1580,14 @@ impl StrExt for str {
     }
 
     #[inline]
+    #[allow(deprecated)]
     fn char_range_at(&self, i: usize) -> CharRange {
         let (c, n) = char_range_at_raw(self.as_bytes(), i);
         CharRange { ch: unsafe { mem::transmute(c) }, next: n }
     }
 
     #[inline]
+    #[allow(deprecated)]
     fn char_range_at_reverse(&self, start: usize) -> CharRange {
         let mut prev = start;
 
@@ -1610,11 +1619,13 @@ impl StrExt for str {
     }
 
     #[inline]
+    #[allow(deprecated)]
     fn char_at(&self, i: usize) -> char {
         self.char_range_at(i).ch
     }
 
     #[inline]
+    #[allow(deprecated)]
     fn char_at_reverse(&self, i: usize) -> char {
         self.char_range_at_reverse(i).ch
     }
@@ -1643,8 +1654,8 @@ impl StrExt for str {
         if self.is_empty() {
             None
         } else {
-            let CharRange {ch, next} = self.char_range_at(0);
-            let next_s = unsafe { self.slice_unchecked(next, self.len()) };
+            let ch = self.char_at(0);
+            let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) };
             Some((ch, next_s))
         }
     }
diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs
index 38abf3881bdf6..21b0e9e56ef3d 100644
--- a/src/libgetopts/lib.rs
+++ b/src/libgetopts/lib.rs
@@ -95,7 +95,6 @@
 #![feature(collections)]
 #![feature(int_uint)]
 #![feature(staged_api)]
-#![feature(core)]
 #![feature(str_words)]
 #![cfg_attr(test, feature(rustc_private))]
 
@@ -620,8 +619,8 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
                 let mut j = 1;
                 names = Vec::new();
                 while j < curlen {
-                    let range = cur.char_range_at(j);
-                    let opt = Short(range.ch);
+                    let ch = cur.char_at(j);
+                    let opt = Short(ch);
 
                     /* In a series of potential options (eg. -aheJ), if we
                        see one which takes an argument, we assume all
@@ -642,12 +641,13 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
                         No => false
                     };
 
-                    if arg_follows && range.next < curlen {
-                        i_arg = Some((&cur[range.next..curlen]).to_string());
+                    let next = j + ch.len_utf8();
+                    if arg_follows && next < curlen {
+                        i_arg = Some((&cur[next..curlen]).to_string());
                         break;
                     }
 
-                    j = range.next;
+                    j = next;
                 }
             }
             let mut name_pos = 0;
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs
index fb9e0480cebc8..277f5365db3ec 100644
--- a/src/libsyntax/parse/lexer/comments.rs
+++ b/src/libsyntax/parse/lexer/comments.rs
@@ -20,7 +20,6 @@ use parse::lexer;
 use print::pprust;
 
 use std::io::Read;
-use std::str;
 use std::usize;
 
 #[derive(Clone, Copy, PartialEq)]
@@ -210,11 +209,11 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
     let mut col = col.to_usize();
     let mut cursor: usize = 0;
     while col > 0 && cursor < len {
-        let r: str::CharRange = s.char_range_at(cursor);
-        if !r.ch.is_whitespace() {
+        let ch = s.char_at(cursor);
+        if !ch.is_whitespace() {
             return None;
         }
-        cursor = r.next;
+        cursor += ch.len_utf8();
         col -= 1;
     }
     return Some(cursor);
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index f5781e0587d24..d8ff62b6c4edc 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -22,7 +22,6 @@ use std::fmt;
 use std::mem::replace;
 use std::num;
 use std::rc::Rc;
-use std::str;
 
 pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag};
 
@@ -291,7 +290,8 @@ impl<'a> StringReader<'a> {
                           s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
         let mut i = 0;
         while i < s.len() {
-            let str::CharRange { ch, next } = s.char_range_at(i);
+            let ch = s.char_at(i);
+            let next = i + ch.len_utf8();
             if ch == '\r' {
                 if next < s.len() && s.char_at(next) == '\n' {
                     return translate_crlf_(self, start, s, errmsg, i).into_cow();
@@ -309,7 +309,8 @@ impl<'a> StringReader<'a> {
             let mut buf = String::with_capacity(s.len());
             let mut j = 0;
             while i < s.len() {
-                let str::CharRange { ch, next } = s.char_range_at(i);
+                let ch = s.char_at(i);
+                let next = i + ch.len_utf8();
                 if ch == '\r' {
                     if j < i { buf.push_str(&s[j..i]); }
                     j = next;
@@ -335,10 +336,11 @@ impl<'a> StringReader<'a> {
         if current_byte_offset < self.source_text.len() {
             assert!(self.curr.is_some());
             let last_char = self.curr.unwrap();
-            let next = self.source_text.char_range_at(current_byte_offset);
-            let byte_offset_diff = next.next - current_byte_offset;
+            let ch = self.source_text.char_at(current_byte_offset);
+            let next = current_byte_offset + ch.len_utf8();
+            let byte_offset_diff = next - current_byte_offset;
             self.pos = self.pos + Pos::from_usize(byte_offset_diff);
-            self.curr = Some(next.ch);
+            self.curr = Some(ch);
             self.col = self.col + CharPos(1);
             if last_char == '\n' {
                 self.filemap.next_line(self.last_pos);
@@ -370,7 +372,7 @@ impl<'a> StringReader<'a> {
         let offset = self.byte_offset(self.pos).to_usize();
         let s = &self.source_text[..];
         if offset >= s.len() { return None }
-        let str::CharRange { next, .. } = s.char_range_at(offset);
+        let next = offset + s.char_at(offset).len_utf8();
         if next < s.len() {
             Some(s.char_at(next))
         } else {
diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs
index 57439addeaa1a..4b9948cb56276 100644
--- a/src/libunicode/u_str.rs
+++ b/src/libunicode/u_str.rs
@@ -243,7 +243,7 @@ impl<'a> Iterator for Graphemes<'a> {
         }
 
         self.cat = if take_curr {
-            idx = self.string.char_range_at(idx).next;
+            idx = idx + UCharExt::len_utf8(self.string.char_at(idx));
             None
         } else {
             Some(cat)