Skip to content

Commit 1339c1b

Browse files
committed
Implement feature string_from_utf8_lossy_owned
Implement feature for lossily converting from `Vec<u8>` to `String` - Add `String::from_utf8_lossy_owned` - Add `FromUtf8Error::into_utf8_lossy`
1 parent 8cd20cb commit 1339c1b

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed

alloc/src/string.rs

+74
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,56 @@ impl String {
662662
Cow::Owned(res)
663663
}
664664

665+
/// Converts a [`Vec<u8>`] to a `String`, substituting invalid UTF-8
666+
/// sequences with replacement characters.
667+
///
668+
/// See [`from_utf8_lossy`] for more details.
669+
///
670+
/// [`from_utf8_lossy`]: String::from_utf8_lossy
671+
///
672+
/// Note that this function does not guarantee reuse of the original `Vec`
673+
/// allocation.
674+
///
675+
/// # Examples
676+
///
677+
/// Basic usage:
678+
///
679+
/// ```
680+
/// #![feature(string_from_utf8_lossy_owned)]
681+
/// // some bytes, in a vector
682+
/// let sparkle_heart = vec![240, 159, 146, 150];
683+
///
684+
/// let sparkle_heart = String::from_utf8_lossy_owned(sparkle_heart);
685+
///
686+
/// assert_eq!(String::from("💖"), sparkle_heart);
687+
/// ```
688+
///
689+
/// Incorrect bytes:
690+
///
691+
/// ```
692+
/// #![feature(string_from_utf8_lossy_owned)]
693+
/// // some invalid bytes
694+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
695+
/// let output = String::from_utf8_lossy_owned(input);
696+
///
697+
/// assert_eq!(String::from("Hello �World"), output);
698+
/// ```
699+
#[must_use]
700+
#[cfg(not(no_global_oom_handling))]
701+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
702+
pub fn from_utf8_lossy_owned(v: Vec<u8>) -> String {
703+
if let Cow::Owned(string) = String::from_utf8_lossy(&v) {
704+
string
705+
} else {
706+
// SAFETY: `String::from_utf8_lossy`'s contract ensures that if
707+
// it returns a `Cow::Borrowed`, it is a valid UTF-8 string.
708+
// Otherwise, it returns a new allocation of an owned `String`, with
709+
// replacement characters for invalid sequences, which is returned
710+
// above.
711+
unsafe { String::from_utf8_unchecked(v) }
712+
}
713+
}
714+
665715
/// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`]
666716
/// if `v` contains any invalid data.
667717
///
@@ -2012,6 +2062,30 @@ impl FromUtf8Error {
20122062
&self.bytes[..]
20132063
}
20142064

2065+
/// Converts the bytes into a `String` lossily, substituting invalid UTF-8
2066+
/// sequences with replacement characters.
2067+
///
2068+
/// See [`String::from_utf8_lossy`] for more details on replacement of
2069+
/// invalid sequences, and [`String::from_utf8_lossy_owned`] for the
2070+
/// `String` function which corresponds to this function.
2071+
///
2072+
/// # Examples
2073+
///
2074+
/// ```
2075+
/// #![feature(string_from_utf8_lossy_owned)]
2076+
/// // some invalid bytes
2077+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
2078+
/// let output = String::from_utf8(input).unwrap_or_else(|e| e.into_utf8_lossy());
2079+
///
2080+
/// assert_eq!(String::from("Hello �World"), output);
2081+
/// ```
2082+
#[must_use]
2083+
#[cfg(not(no_global_oom_handling))]
2084+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
2085+
pub fn into_utf8_lossy(self) -> String {
2086+
String::from_utf8_lossy_owned(self.bytes)
2087+
}
2088+
20152089
/// Returns the bytes that were attempted to convert to a `String`.
20162090
///
20172091
/// This method is carefully constructed to avoid allocation. It will

0 commit comments

Comments
 (0)