Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b66efdd

Browse files
authoredSep 15, 2024
Rollup merge of rust-lang#129439 - okaneco:vec_string_lossy, r=Noratrieb
Implement feature `string_from_utf8_lossy_owned` for lossy conversion from `Vec<u8>` to `String` methods Accepted ACP: rust-lang/libs-team#116 Tracking issue: rust-lang#129436 Implement feature for lossily converting from `Vec<u8>` to `String` - Add `String::from_utf8_lossy_owned` - Add `FromUtf8Error::into_utf8_lossy` --- Related to rust-lang#64727, but unsure whether to mark it "fixed" by this PR. That issue partly asks for in-place replacement of the original allocation. We fulfill the other half of that request with these functions. closes rust-lang#64727
2 parents af73374 + 1339c1b commit b66efdd

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed
 

‎alloc/src/string.rs

+74
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,56 @@ impl String {
660660
Cow::Owned(res)
661661
}
662662

663+
/// Converts a [`Vec<u8>`] to a `String`, substituting invalid UTF-8
664+
/// sequences with replacement characters.
665+
///
666+
/// See [`from_utf8_lossy`] for more details.
667+
///
668+
/// [`from_utf8_lossy`]: String::from_utf8_lossy
669+
///
670+
/// Note that this function does not guarantee reuse of the original `Vec`
671+
/// allocation.
672+
///
673+
/// # Examples
674+
///
675+
/// Basic usage:
676+
///
677+
/// ```
678+
/// #![feature(string_from_utf8_lossy_owned)]
679+
/// // some bytes, in a vector
680+
/// let sparkle_heart = vec![240, 159, 146, 150];
681+
///
682+
/// let sparkle_heart = String::from_utf8_lossy_owned(sparkle_heart);
683+
///
684+
/// assert_eq!(String::from("💖"), sparkle_heart);
685+
/// ```
686+
///
687+
/// Incorrect bytes:
688+
///
689+
/// ```
690+
/// #![feature(string_from_utf8_lossy_owned)]
691+
/// // some invalid bytes
692+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
693+
/// let output = String::from_utf8_lossy_owned(input);
694+
///
695+
/// assert_eq!(String::from("Hello �World"), output);
696+
/// ```
697+
#[must_use]
698+
#[cfg(not(no_global_oom_handling))]
699+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
700+
pub fn from_utf8_lossy_owned(v: Vec<u8>) -> String {
701+
if let Cow::Owned(string) = String::from_utf8_lossy(&v) {
702+
string
703+
} else {
704+
// SAFETY: `String::from_utf8_lossy`'s contract ensures that if
705+
// it returns a `Cow::Borrowed`, it is a valid UTF-8 string.
706+
// Otherwise, it returns a new allocation of an owned `String`, with
707+
// replacement characters for invalid sequences, which is returned
708+
// above.
709+
unsafe { String::from_utf8_unchecked(v) }
710+
}
711+
}
712+
663713
/// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`]
664714
/// if `v` contains any invalid data.
665715
///
@@ -2010,6 +2060,30 @@ impl FromUtf8Error {
20102060
&self.bytes[..]
20112061
}
20122062

2063+
/// Converts the bytes into a `String` lossily, substituting invalid UTF-8
2064+
/// sequences with replacement characters.
2065+
///
2066+
/// See [`String::from_utf8_lossy`] for more details on replacement of
2067+
/// invalid sequences, and [`String::from_utf8_lossy_owned`] for the
2068+
/// `String` function which corresponds to this function.
2069+
///
2070+
/// # Examples
2071+
///
2072+
/// ```
2073+
/// #![feature(string_from_utf8_lossy_owned)]
2074+
/// // some invalid bytes
2075+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
2076+
/// let output = String::from_utf8(input).unwrap_or_else(|e| e.into_utf8_lossy());
2077+
///
2078+
/// assert_eq!(String::from("Hello �World"), output);
2079+
/// ```
2080+
#[must_use]
2081+
#[cfg(not(no_global_oom_handling))]
2082+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
2083+
pub fn into_utf8_lossy(self) -> String {
2084+
String::from_utf8_lossy_owned(self.bytes)
2085+
}
2086+
20132087
/// Returns the bytes that were attempted to convert to a `String`.
20142088
///
20152089
/// This method is carefully constructed to avoid allocation. It will

0 commit comments

Comments
 (0)
Please sign in to comment.