@@ -6,6 +6,44 @@ use crate::iter::FusedIterator;
6
6
use super :: from_utf8_unchecked;
7
7
use super :: validations:: utf8_char_width;
8
8
9
+ impl [ u8 ] {
10
+ /// Creates an iterator over the contiguous valid UTF-8 ranges of this
11
+ /// slice, and the non-UTF-8 fragments in between.
12
+ ///
13
+ /// # Examples
14
+ ///
15
+ /// This function formats arbitrary but mostly-UTF-8 bytes into Rust source
16
+ /// code in the form of a C-string literal (`c"..."`).
17
+ ///
18
+ /// ```
19
+ /// use std::fmt::Write as _;
20
+ ///
21
+ /// pub fn cstr_literal(bytes: &[u8]) -> String {
22
+ /// let mut repr = String::new();
23
+ /// repr.push_str("c\"");
24
+ /// for chunk in bytes.utf8_chunks() {
25
+ /// for ch in chunk.valid().chars() {
26
+ /// // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for non-printable characters.
27
+ /// write!(repr, "{}", ch.escape_debug()).unwrap();
28
+ /// }
29
+ /// for byte in chunk.invalid() {
30
+ /// write!(repr, "\\x{:02X}", byte).unwrap();
31
+ /// }
32
+ /// }
33
+ /// repr.push('"');
34
+ /// repr
35
+ /// }
36
+ ///
37
+ /// fn main() {
38
+ /// println!("{}", cstr_literal(b"\xferris the \xf0\x9f\xa6\x80\x07"));
39
+ /// }
40
+ /// ```
41
+ #[ stable( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION" ) ]
42
+ pub fn utf8_chunks ( & self ) -> Utf8Chunks < ' _ > {
43
+ Utf8Chunks { source : self }
44
+ }
45
+ }
46
+
9
47
/// An item returned by the [`Utf8Chunks`] iterator.
10
48
///
11
49
/// A `Utf8Chunk` stores a sequence of [`u8`] up to the first broken character
@@ -14,23 +52,19 @@ use super::validations::utf8_char_width;
14
52
/// # Examples
15
53
///
16
54
/// ```
17
- /// #![feature(utf8_chunks)]
18
- ///
19
- /// use std::str::Utf8Chunks;
20
- ///
21
55
/// // An invalid UTF-8 string
22
56
/// let bytes = b"foo\xF1\x80bar";
23
57
///
24
58
/// // Decode the first `Utf8Chunk`
25
- /// let chunk = Utf8Chunks::new( bytes).next().unwrap();
59
+ /// let chunk = bytes.utf8_chunks( ).next().unwrap();
26
60
///
27
61
/// // The first three characters are valid UTF-8
28
62
/// assert_eq!("foo", chunk.valid());
29
63
///
30
64
/// // The fourth character is broken
31
65
/// assert_eq!(b"\xF1\x80", chunk.invalid());
32
66
/// ```
33
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
67
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
34
68
#[ derive( Clone , Debug , PartialEq , Eq ) ]
35
69
pub struct Utf8Chunk < ' a > {
36
70
valid : & ' a str ,
@@ -43,7 +77,7 @@ impl<'a> Utf8Chunk<'a> {
43
77
/// This substring can be empty at the start of the string or between
44
78
/// broken UTF-8 characters.
45
79
#[ must_use]
46
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
80
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
47
81
pub fn valid ( & self ) -> & ' a str {
48
82
self . valid
49
83
}
@@ -63,7 +97,7 @@ impl<'a> Utf8Chunk<'a> {
63
97
/// [`valid`]: Self::valid
64
98
/// [`U+FFFD REPLACEMENT CHARACTER`]: crate::char::REPLACEMENT_CHARACTER
65
99
#[ must_use]
66
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
100
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
67
101
pub fn invalid ( & self ) -> & ' a [ u8 ] {
68
102
self . invalid
69
103
}
@@ -78,7 +112,7 @@ impl fmt::Debug for Debug<'_> {
78
112
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
79
113
f. write_char ( '"' ) ?;
80
114
81
- for chunk in Utf8Chunks :: new ( self . 0 ) {
115
+ for chunk in self . 0 . utf8_chunks ( ) {
82
116
// Valid part.
83
117
// Here we partially parse UTF-8 again which is suboptimal.
84
118
{
@@ -123,12 +157,8 @@ impl fmt::Debug for Debug<'_> {
123
157
/// [`String::from_utf8_lossy`] without allocating heap memory:
124
158
///
125
159
/// ```
126
- /// #![feature(utf8_chunks)]
127
- ///
128
- /// use std::str::Utf8Chunks;
129
- ///
130
160
/// fn from_utf8_lossy<F>(input: &[u8], mut push: F) where F: FnMut(&str) {
131
- /// for chunk in Utf8Chunks::new( input) {
161
+ /// for chunk in input.utf8_chunks( ) {
132
162
/// push(chunk.valid());
133
163
///
134
164
/// if !chunk.invalid().is_empty() {
@@ -140,27 +170,21 @@ impl fmt::Debug for Debug<'_> {
140
170
///
141
171
/// [`String::from_utf8_lossy`]: ../../std/string/struct.String.html#method.from_utf8_lossy
142
172
#[ must_use = "iterators are lazy and do nothing unless consumed" ]
143
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
173
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
144
174
#[ derive( Clone ) ]
145
175
pub struct Utf8Chunks < ' a > {
146
176
source : & ' a [ u8 ] ,
147
177
}
148
178
149
179
impl < ' a > Utf8Chunks < ' a > {
150
- /// Creates a new iterator to decode the bytes.
151
- #[ unstable( feature = "utf8_chunks" , issue = "99543" ) ]
152
- pub fn new ( bytes : & ' a [ u8 ] ) -> Self {
153
- Self { source : bytes }
154
- }
155
-
156
180
#[ doc( hidden) ]
157
181
#[ unstable( feature = "str_internals" , issue = "none" ) ]
158
182
pub fn debug ( & self ) -> Debug < ' _ > {
159
183
Debug ( self . source )
160
184
}
161
185
}
162
186
163
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
187
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
164
188
impl < ' a > Iterator for Utf8Chunks < ' a > {
165
189
type Item = Utf8Chunk < ' a > ;
166
190
@@ -259,10 +283,10 @@ impl<'a> Iterator for Utf8Chunks<'a> {
259
283
}
260
284
}
261
285
262
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
286
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
263
287
impl FusedIterator for Utf8Chunks < ' _ > { }
264
288
265
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
289
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
266
290
impl fmt:: Debug for Utf8Chunks < ' _ > {
267
291
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
268
292
f. debug_struct ( "Utf8Chunks" ) . field ( "source" , & self . debug ( ) ) . finish ( )
0 commit comments