Skip to content

Commit 3b6ef52

Browse files
committed
Auto merge of #123909 - dtolnay:utf8chunks, r=<try>
Stabilize `Utf8Chunks` Pending FCP in #99543. This PR includes the proposed modification in rust-lang/libs-team#190 as agreed in #99543 (comment).
2 parents 0230848 + 47ca0ce commit 3b6ef52

File tree

9 files changed

+55
-40
lines changed

9 files changed

+55
-40
lines changed

library/alloc/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@
163163
#![feature(tuple_trait)]
164164
#![feature(unicode_internals)]
165165
#![feature(unsize)]
166-
#![feature(utf8_chunks)]
167166
#![feature(vec_pop_if)]
168167
// tidy-alphabetical-end
169168
//

library/alloc/src/str.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ pub use core::str::{RSplit, Split};
5353
pub use core::str::{RSplitN, SplitN};
5454
#[stable(feature = "rust1", since = "1.0.0")]
5555
pub use core::str::{RSplitTerminator, SplitTerminator};
56-
#[unstable(feature = "utf8_chunks", issue = "99543")]
56+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
5757
pub use core::str::{Utf8Chunk, Utf8Chunks};
5858

5959
/// Note: `str` in `Concat<str>` is not meaningful here.

library/alloc/src/string.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,6 @@ use core::ops::{self, Range, RangeBounds};
5858
use core::ptr;
5959
use core::slice;
6060
use core::str::pattern::Pattern;
61-
#[cfg(not(no_global_oom_handling))]
62-
use core::str::Utf8Chunks;
6361

6462
#[cfg(not(no_global_oom_handling))]
6563
use crate::borrow::{Cow, ToOwned};
@@ -633,7 +631,7 @@ impl String {
633631
#[cfg(not(no_global_oom_handling))]
634632
#[stable(feature = "rust1", since = "1.0.0")]
635633
pub fn from_utf8_lossy(v: &[u8]) -> Cow<'_, str> {
636-
let mut iter = Utf8Chunks::new(v);
634+
let mut iter = v.utf8_chunks();
637635

638636
let first_valid = if let Some(chunk) = iter.next() {
639637
let valid = chunk.valid();

library/core/src/str/lossy.rs

+48-24
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,44 @@ use crate::iter::FusedIterator;
66
use super::from_utf8_unchecked;
77
use super::validations::utf8_char_width;
88

9+
impl [u8] {
10+
/// Creates an iterator over the contiguous valid UTF-8 ranges of this
11+
/// slice, and the non-UTF-8 fragments in between.
12+
///
13+
/// # Examples
14+
///
15+
/// This function formats arbitrary but mostly-UTF-8 bytes into Rust source
16+
/// code in the form of a C-string literal (`c"..."`).
17+
///
18+
/// ```
19+
/// use std::fmt::Write as _;
20+
///
21+
/// pub fn cstr_literal(bytes: &[u8]) -> String {
22+
/// let mut repr = String::new();
23+
/// repr.push_str("c\"");
24+
/// for chunk in bytes.utf8_chunks() {
25+
/// for ch in chunk.valid().chars() {
26+
/// // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for non-printable characters.
27+
/// write!(repr, "{}", ch.escape_debug()).unwrap();
28+
/// }
29+
/// for byte in chunk.invalid() {
30+
/// write!(repr, "\\x{:02X}", byte).unwrap();
31+
/// }
32+
/// }
33+
/// repr.push('"');
34+
/// repr
35+
/// }
36+
///
37+
/// fn main() {
38+
/// println!("{}", cstr_literal(b"\xferris the \xf0\x9f\xa6\x80\x07"));
39+
/// }
40+
/// ```
41+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
42+
pub fn utf8_chunks(&self) -> Utf8Chunks<'_> {
43+
Utf8Chunks { source: self }
44+
}
45+
}
46+
947
/// An item returned by the [`Utf8Chunks`] iterator.
1048
///
1149
/// A `Utf8Chunk` stores a sequence of [`u8`] up to the first broken character
@@ -14,23 +52,19 @@ use super::validations::utf8_char_width;
1452
/// # Examples
1553
///
1654
/// ```
17-
/// #![feature(utf8_chunks)]
18-
///
19-
/// use std::str::Utf8Chunks;
20-
///
2155
/// // An invalid UTF-8 string
2256
/// let bytes = b"foo\xF1\x80bar";
2357
///
2458
/// // Decode the first `Utf8Chunk`
25-
/// let chunk = Utf8Chunks::new(bytes).next().unwrap();
59+
/// let chunk = bytes.utf8_chunks().next().unwrap();
2660
///
2761
/// // The first three characters are valid UTF-8
2862
/// assert_eq!("foo", chunk.valid());
2963
///
3064
/// // The fourth character is broken
3165
/// assert_eq!(b"\xF1\x80", chunk.invalid());
3266
/// ```
33-
#[unstable(feature = "utf8_chunks", issue = "99543")]
67+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
3468
#[derive(Clone, Debug, PartialEq, Eq)]
3569
pub struct Utf8Chunk<'a> {
3670
valid: &'a str,
@@ -43,7 +77,7 @@ impl<'a> Utf8Chunk<'a> {
4377
/// This substring can be empty at the start of the string or between
4478
/// broken UTF-8 characters.
4579
#[must_use]
46-
#[unstable(feature = "utf8_chunks", issue = "99543")]
80+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
4781
pub fn valid(&self) -> &'a str {
4882
self.valid
4983
}
@@ -63,7 +97,7 @@ impl<'a> Utf8Chunk<'a> {
6397
/// [`valid`]: Self::valid
6498
/// [`U+FFFD REPLACEMENT CHARACTER`]: crate::char::REPLACEMENT_CHARACTER
6599
#[must_use]
66-
#[unstable(feature = "utf8_chunks", issue = "99543")]
100+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
67101
pub fn invalid(&self) -> &'a [u8] {
68102
self.invalid
69103
}
@@ -78,7 +112,7 @@ impl fmt::Debug for Debug<'_> {
78112
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
79113
f.write_char('"')?;
80114

81-
for chunk in Utf8Chunks::new(self.0) {
115+
for chunk in self.0.utf8_chunks() {
82116
// Valid part.
83117
// Here we partially parse UTF-8 again which is suboptimal.
84118
{
@@ -123,12 +157,8 @@ impl fmt::Debug for Debug<'_> {
123157
/// [`String::from_utf8_lossy`] without allocating heap memory:
124158
///
125159
/// ```
126-
/// #![feature(utf8_chunks)]
127-
///
128-
/// use std::str::Utf8Chunks;
129-
///
130160
/// fn from_utf8_lossy<F>(input: &[u8], mut push: F) where F: FnMut(&str) {
131-
/// for chunk in Utf8Chunks::new(input) {
161+
/// for chunk in input.utf8_chunks() {
132162
/// push(chunk.valid());
133163
///
134164
/// if !chunk.invalid().is_empty() {
@@ -140,27 +170,21 @@ impl fmt::Debug for Debug<'_> {
140170
///
141171
/// [`String::from_utf8_lossy`]: ../../std/string/struct.String.html#method.from_utf8_lossy
142172
#[must_use = "iterators are lazy and do nothing unless consumed"]
143-
#[unstable(feature = "utf8_chunks", issue = "99543")]
173+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
144174
#[derive(Clone)]
145175
pub struct Utf8Chunks<'a> {
146176
source: &'a [u8],
147177
}
148178

149179
impl<'a> Utf8Chunks<'a> {
150-
/// Creates a new iterator to decode the bytes.
151-
#[unstable(feature = "utf8_chunks", issue = "99543")]
152-
pub fn new(bytes: &'a [u8]) -> Self {
153-
Self { source: bytes }
154-
}
155-
156180
#[doc(hidden)]
157181
#[unstable(feature = "str_internals", issue = "none")]
158182
pub fn debug(&self) -> Debug<'_> {
159183
Debug(self.source)
160184
}
161185
}
162186

163-
#[unstable(feature = "utf8_chunks", issue = "99543")]
187+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
164188
impl<'a> Iterator for Utf8Chunks<'a> {
165189
type Item = Utf8Chunk<'a>;
166190

@@ -259,10 +283,10 @@ impl<'a> Iterator for Utf8Chunks<'a> {
259283
}
260284
}
261285

262-
#[unstable(feature = "utf8_chunks", issue = "99543")]
286+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
263287
impl FusedIterator for Utf8Chunks<'_> {}
264288

265-
#[unstable(feature = "utf8_chunks", issue = "99543")]
289+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
266290
impl fmt::Debug for Utf8Chunks<'_> {
267291
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
268292
f.debug_struct("Utf8Chunks").field("source", &self.debug()).finish()

library/core/src/str/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::slice::{self, SliceIndex};
2424
pub mod pattern;
2525

2626
mod lossy;
27-
#[unstable(feature = "utf8_chunks", issue = "99543")]
27+
#[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
2828
pub use lossy::{Utf8Chunk, Utf8Chunks};
2929

3030
#[stable(feature = "rust1", since = "1.0.0")]

library/core/tests/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@
118118
#![feature(error_generic_member_access)]
119119
#![feature(error_in_core)]
120120
#![feature(trait_upcasting)]
121-
#![feature(utf8_chunks)]
122121
#![feature(is_ascii_octdigit)]
123122
#![feature(get_many_mut)]
124123
#![feature(iter_map_windows)]

library/core/tests/str_lossy.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
use core::str::Utf8Chunks;
2-
31
#[test]
42
fn chunks() {
53
macro_rules! assert_chunks {
64
( $string:expr, $(($valid:expr, $invalid:expr)),* $(,)? ) => {{
7-
let mut iter = Utf8Chunks::new($string);
5+
let mut iter = $string.utf8_chunks();
86
$(
97
let chunk = iter.next().expect("missing chunk");
108
assert_eq!($valid, chunk.valid());
@@ -79,7 +77,7 @@ fn debug() {
7977
"\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"",
8078
&format!(
8179
"{:?}",
82-
Utf8Chunks::new(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").debug(),
80+
b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa".utf8_chunks().debug(),
8381
),
8482
);
8583
}

library/std/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@
313313
#![feature(thread_local)]
314314
#![feature(try_blocks)]
315315
#![feature(type_alias_impl_trait)]
316-
#![feature(utf8_chunks)]
317316
// tidy-alphabetical-end
318317
//
319318
// Library features (core):

library/std/src/sys/os_str/bytes.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ use crate::str;
1111
use crate::sync::Arc;
1212
use crate::sys_common::{AsInner, IntoInner};
1313

14-
use core::str::Utf8Chunks;
15-
1614
#[cfg(test)]
1715
mod tests;
1816

@@ -29,7 +27,7 @@ pub struct Slice {
2927

3028
impl fmt::Debug for Slice {
3129
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32-
fmt::Debug::fmt(&Utf8Chunks::new(&self.inner).debug(), f)
30+
fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f)
3331
}
3432
}
3533

@@ -41,7 +39,7 @@ impl fmt::Display for Slice {
4139
return "".fmt(f);
4240
}
4341

44-
for chunk in Utf8Chunks::new(&self.inner) {
42+
for chunk in self.inner.utf8_chunks() {
4543
let valid = chunk.valid();
4644
// If we successfully decoded the whole chunk as a valid string then
4745
// we can return a direct formatting of the string which will also

0 commit comments

Comments
 (0)