@@ -6,6 +6,46 @@ use crate::iter::FusedIterator;
6
6
use super :: from_utf8_unchecked;
7
7
use super :: validations:: utf8_char_width;
8
8
9
+ impl [ u8 ] {
10
+ /// Creates an iterator over the contiguous valid UTF-8 ranges of this
11
+ /// slice, and the non-UTF-8 fragments in between.
12
+ ///
13
+ /// # Examples
14
+ ///
15
+ /// This function formats arbitrary but mostly-UTF-8 bytes into Rust source
16
+ /// code in the form of a C-string literal (`c"..."`).
17
+ ///
18
+ /// ```
19
+ /// use std::fmt::Write as _;
20
+ ///
21
+ /// pub fn cstr_literal(bytes: &[u8]) -> String {
22
+ /// let mut repr = String::new();
23
+ /// repr.push_str("c\"");
24
+ /// for chunk in bytes.utf8_chunks() {
25
+ /// for ch in chunk.valid().chars() {
26
+ /// // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for non-printable characters.
27
+ /// write!(repr, "{}", ch.escape_debug()).unwrap();
28
+ /// }
29
+ /// for byte in chunk.invalid() {
30
+ /// write!(repr, "\\x{:02X}", byte).unwrap();
31
+ /// }
32
+ /// }
33
+ /// repr.push('"');
34
+ /// repr
35
+ /// }
36
+ ///
37
+ /// fn main() {
38
+ /// let lit = cstr_literal(b"\xferris the \xf0\x9f\xa6\x80\x07");
39
+ /// let expected = stringify!(c"\xFErris the 🦀\u{7}");
40
+ /// assert_eq!(lit, expected);
41
+ /// }
42
+ /// ```
43
+ #[ stable( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION" ) ]
44
+ pub fn utf8_chunks ( & self ) -> Utf8Chunks < ' _ > {
45
+ Utf8Chunks { source : self }
46
+ }
47
+ }
48
+
9
49
/// An item returned by the [`Utf8Chunks`] iterator.
10
50
///
11
51
/// A `Utf8Chunk` stores a sequence of [`u8`] up to the first broken character
@@ -14,23 +54,19 @@ use super::validations::utf8_char_width;
14
54
/// # Examples
15
55
///
16
56
/// ```
17
- /// #![feature(utf8_chunks)]
18
- ///
19
- /// use std::str::Utf8Chunks;
20
- ///
21
57
/// // An invalid UTF-8 string
22
58
/// let bytes = b"foo\xF1\x80bar";
23
59
///
24
60
/// // Decode the first `Utf8Chunk`
25
- /// let chunk = Utf8Chunks::new( bytes).next().unwrap();
61
+ /// let chunk = bytes.utf8_chunks( ).next().unwrap();
26
62
///
27
63
/// // The first three characters are valid UTF-8
28
64
/// assert_eq!("foo", chunk.valid());
29
65
///
30
66
/// // The fourth character is broken
31
67
/// assert_eq!(b"\xF1\x80", chunk.invalid());
32
68
/// ```
33
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
69
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
34
70
#[ derive( Clone , Debug , PartialEq , Eq ) ]
35
71
pub struct Utf8Chunk < ' a > {
36
72
valid : & ' a str ,
@@ -43,7 +79,7 @@ impl<'a> Utf8Chunk<'a> {
43
79
/// This substring can be empty at the start of the string or between
44
80
/// broken UTF-8 characters.
45
81
#[ must_use]
46
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
82
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
47
83
pub fn valid ( & self ) -> & ' a str {
48
84
self . valid
49
85
}
@@ -63,7 +99,7 @@ impl<'a> Utf8Chunk<'a> {
63
99
/// [`valid`]: Self::valid
64
100
/// [`U+FFFD REPLACEMENT CHARACTER`]: crate::char::REPLACEMENT_CHARACTER
65
101
#[ must_use]
66
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
102
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
67
103
pub fn invalid ( & self ) -> & ' a [ u8 ] {
68
104
self . invalid
69
105
}
@@ -78,7 +114,7 @@ impl fmt::Debug for Debug<'_> {
78
114
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
79
115
f. write_char ( '"' ) ?;
80
116
81
- for chunk in Utf8Chunks :: new ( self . 0 ) {
117
+ for chunk in self . 0 . utf8_chunks ( ) {
82
118
// Valid part.
83
119
// Here we partially parse UTF-8 again which is suboptimal.
84
120
{
@@ -123,12 +159,8 @@ impl fmt::Debug for Debug<'_> {
123
159
/// [`String::from_utf8_lossy`] without allocating heap memory:
124
160
///
125
161
/// ```
126
- /// #![feature(utf8_chunks)]
127
- ///
128
- /// use std::str::Utf8Chunks;
129
- ///
130
162
/// fn from_utf8_lossy<F>(input: &[u8], mut push: F) where F: FnMut(&str) {
131
- /// for chunk in Utf8Chunks::new( input) {
163
+ /// for chunk in input.utf8_chunks( ) {
132
164
/// push(chunk.valid());
133
165
///
134
166
/// if !chunk.invalid().is_empty() {
@@ -140,27 +172,21 @@ impl fmt::Debug for Debug<'_> {
140
172
///
141
173
/// [`String::from_utf8_lossy`]: ../../std/string/struct.String.html#method.from_utf8_lossy
142
174
#[ must_use = "iterators are lazy and do nothing unless consumed" ]
143
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
175
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
144
176
#[ derive( Clone ) ]
145
177
pub struct Utf8Chunks < ' a > {
146
178
source : & ' a [ u8 ] ,
147
179
}
148
180
149
181
impl < ' a > Utf8Chunks < ' a > {
150
- /// Creates a new iterator to decode the bytes.
151
- #[ unstable( feature = "utf8_chunks" , issue = "99543" ) ]
152
- pub fn new ( bytes : & ' a [ u8 ] ) -> Self {
153
- Self { source : bytes }
154
- }
155
-
156
182
#[ doc( hidden) ]
157
183
#[ unstable( feature = "str_internals" , issue = "none" ) ]
158
184
pub fn debug ( & self ) -> Debug < ' _ > {
159
185
Debug ( self . source )
160
186
}
161
187
}
162
188
163
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
189
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
164
190
impl < ' a > Iterator for Utf8Chunks < ' a > {
165
191
type Item = Utf8Chunk < ' a > ;
166
192
@@ -259,10 +285,10 @@ impl<'a> Iterator for Utf8Chunks<'a> {
259
285
}
260
286
}
261
287
262
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
288
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
263
289
impl FusedIterator for Utf8Chunks < ' _ > { }
264
290
265
- #[ unstable ( feature = "utf8_chunks" , issue = "99543 " ) ]
291
+ #[ stable ( feature = "utf8_chunks" , since = "CURRENT_RUSTC_VERSION " ) ]
266
292
impl fmt:: Debug for Utf8Chunks < ' _ > {
267
293
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
268
294
f. debug_struct ( "Utf8Chunks" ) . field ( "source" , & self . debug ( ) ) . finish ( )
0 commit comments