diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index 5c0541d3caf33..c9d02bd54194a 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -8,6 +8,7 @@ use crate::fmt;
use crate::hash::{Hash, Hasher};
use crate::ops;
use crate::rc::Rc;
+use crate::str::pattern::Pattern;
use crate::str::FromStr;
use crate::sync::Arc;
@@ -178,6 +179,34 @@ impl OsString {
self.inner.into_string().map_err(|buf| OsString { inner: buf })
}
+ /// Splits the `OsString` into a Unicode prefix and non-Unicode suffix.
+ ///
+ /// The returned `String` is the longest prefix of the `OsString` that
+ /// contained valid Unicode. The returned `OsString` is the rest of the
+ /// original value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(osstr_str_prefix_ops)]
+ ///
+ /// use std::ffi::OsString;
+ ///
+ /// let os_string = OsString::from("foo");
+ /// let (prefix, suffix) = os_string.clone().into_string_split();
+ ///
+ /// let mut rejoined = OsString::from(prefix);
+ /// rejoined.push(suffix);
+ /// assert_eq!(rejoined, os_string);
+ /// ```
+ #[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
+ #[must_use]
+ #[inline]
+ pub fn into_string_split(self) -> (String, OsString) {
+ let (prefix, suffix) = self.inner.into_string_split();
+ (prefix, OsString { inner: suffix })
+ }
+
/// Extends the string with the given &[OsStr]
slice.
///
/// # Examples
@@ -703,6 +732,34 @@ impl OsStr {
self.inner.to_str()
}
+ /// Splits the `OsStr` into a Unicode prefix and non-Unicode suffix.
+ ///
+ /// The returned `str` is the longest prefix of the `OsStr` that
+ /// contained valid Unicode. The returned `OsStr` is the rest of the
+ /// original value.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(osstr_str_prefix_ops)]
+ ///
+ /// use std::ffi::{OsStr, OsString};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let (prefix, suffix) = os_str.to_str_split();
+ ///
+ /// let mut rejoined = OsString::from(prefix);
+ /// rejoined.push(suffix);
+ /// assert_eq!(rejoined, os_str);
+ /// ```
+ #[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
+ #[must_use]
+ #[inline]
+ pub fn to_str_split(&self) -> (&str, &OsStr) {
+ let (prefix, suffix) = self.inner.to_str_split();
+ (prefix, Self::from_inner(suffix))
+ }
+
/// Converts an `OsStr` to a [Cow]<[str]>
.
///
/// Any non-Unicode sequences are replaced with
@@ -978,6 +1035,148 @@ impl OsStr {
pub fn eq_ignore_ascii_case>(&self, other: S) -> bool {
self.inner.eq_ignore_ascii_case(&other.as_ref().inner)
}
+
+ /// Returns `true` if the given pattern matches a prefix of this `OsStr`.
+ ///
+ /// Returns `false` if it does not.
+ ///
+ /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
+ /// function or closure that determines if a character matches.
+ ///
+ /// [`char`]: prim@char
+ /// [pattern]: crate::str::pattern
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// #![feature(osstr_str_prefix_ops)]
+ ///
+ /// use std::ffi::OsString;
+ ///
+ /// let bananas = OsString::from("bananas");
+ ///
+ /// assert!(bananas.starts_with("bana"));
+ /// assert!(!bananas.starts_with("nana"));
+ /// ```
+ #[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
+ #[must_use]
+ #[inline]
+ pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool {
+ let (p, _) = self.inner.to_str_split();
+ p.starts_with(pattern)
+ }
+
+ /// Returns `true` if the given `str` matches a prefix of this `OsStr`.
+ ///
+ /// Same as [`OsStr::starts_with`], but is easier to optimize to a
+ /// direct bitwise comparison.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// #![feature(osstr_str_prefix_ops)]
+ ///
+ /// use std::ffi::OsString;
+ ///
+ /// let bananas = OsString::from("bananas");
+ ///
+ /// assert!(bananas.starts_with_str("bana"));
+ /// assert!(!bananas.starts_with_str("nana"));
+ /// ```
+ #[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
+ #[must_use]
+ #[inline]
+ pub fn starts_with_str(&self, prefix: &str) -> bool {
+ self.inner.starts_with_str(prefix)
+ }
+
+ /// Returns this `OsStr` with the given prefix removed.
+ ///
+ /// If the `OsStr` starts with the pattern `prefix`, returns the substring
+ /// after the prefix, wrapped in `Some`.
+ ///
+ /// If the `OsStr` does not start with `prefix`, returns `None`.
+ ///
+ /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
+ /// function or closure that determines if a character matches.
+ ///
+ /// [`char`]: prim@char
+ /// [pattern]: crate::str::pattern
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(osstr_str_prefix_ops)]
+ ///
+ /// use std::ffi::{OsStr, OsString};
+ ///
+ /// let foobar = OsString::from("foo:bar");
+ ///
+ /// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar")));
+ /// assert_eq!(foobar.strip_prefix("bar"), None);
+ /// ```
+ #[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
+ #[must_use]
+ #[inline]
+ pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a OsStr> {
+ Some(OsStr::from_inner(self.inner.strip_prefix(prefix)?))
+ }
+
+ /// Returns this `OsStr` with the given prefix removed.
+ ///
+ /// Same as [`OsStr::strip_prefix`], but is easier to optimize to a
+ /// direct bitwise comparison.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(osstr_str_prefix_ops)]
+ ///
+ /// use std::ffi::{OsStr, OsString};
+ ///
+ /// let foobar = OsString::from("foo:bar");
+ ///
+ /// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar")));
+ /// assert_eq!(foobar.strip_prefix_str("bar"), None);
+ /// ```
+ #[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
+ #[must_use]
+ #[inline]
+ pub fn strip_prefix_str(&self, prefix: &str) -> Option<&OsStr> {
+ Some(OsStr::from_inner(self.inner.strip_prefix_str(prefix)?))
+ }
+
+ /// Splits this `OsStr` on the first occurrence of the specified delimiter,
+ /// returning the prefix before delimiter and suffix after delimiter.
+ ///
+ /// The prefix is returned as a `str`, because a successful `Pattern` match
+ /// implies its matching prefix was valid Unicode.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(osstr_str_prefix_ops)]
+ ///
+ /// use std::ffi::{OsStr, OsString};
+ ///
+ /// let foo = OsString::from("foo:");
+ /// let foobar = OsString::from("foo:bar");
+ ///
+ /// assert_eq!(foo.split_once(':'), Some(("foo", OsStr::new(""))));
+ /// assert_eq!(foobar.split_once(':'), Some(("foo", OsStr::new("bar"))));
+ /// assert_eq!(foobar.split_once('='), None);
+ /// ```
+ #[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
+ #[must_use]
+ #[inline]
+ pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a OsStr)> {
+ let (before, after) = self.inner.split_once(delimiter)?;
+ Some((before, OsStr::from_inner(after)))
+ }
}
#[stable(feature = "box_from_os_str", since = "1.17.0")]
diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs
index 318a46d1b637e..4188c8c950bdb 100644
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@@ -264,6 +264,7 @@
#![feature(needs_panic_runtime)]
#![feature(negative_impls)]
#![feature(never_type)]
+#![feature(pattern)]
#![feature(platform_intrinsics)]
#![feature(prelude_import)]
#![feature(rustc_attrs)]
diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs
index 017e2af29d4f4..051ffde8ad557 100644
--- a/library/std/src/sys/unix/os_str.rs
+++ b/library/std/src/sys/unix/os_str.rs
@@ -8,6 +8,7 @@ use crate::fmt::Write;
use crate::mem;
use crate::rc::Rc;
use crate::str;
+use crate::str::pattern::{Pattern, SearchStep, Searcher};
use crate::sync::Arc;
use crate::sys_common::{AsInner, IntoInner};
@@ -164,6 +165,27 @@ impl Buf {
String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() })
}
+ pub fn into_string_split(self) -> (String, Buf) {
+ let utf8_err = match str::from_utf8(&self.inner) {
+ Ok(_) => {
+ // SAFETY: If `str::from_utf8()` succeeds then the input is UTF-8.
+ let prefix = unsafe { String::from_utf8_unchecked(self.inner) };
+ return (prefix, Buf { inner: Vec::new() });
+ }
+ Err(err) => err,
+ };
+ let utf8_len = utf8_err.valid_up_to();
+ if utf8_len == 0 {
+ return (String::new(), self);
+ }
+ let mut utf8_bytes = self.inner;
+ let rem_bytes = utf8_bytes.split_off(utf8_len);
+ // SAFETY: `Utf8Error::valid_up_to()` returns an index up to which
+ // valid UTF-8 has been verified.
+ let prefix = unsafe { String::from_utf8_unchecked(utf8_bytes) };
+ (prefix, Buf { inner: rem_bytes })
+ }
+
pub fn push_slice(&mut self, s: &Slice) {
self.inner.extend_from_slice(&s.inner)
}
@@ -205,6 +227,21 @@ impl Slice {
str::from_utf8(&self.inner).ok()
}
+ pub fn to_str_split(&self) -> (&str, &Slice) {
+ let utf8_err = match str::from_utf8(&self.inner) {
+ Ok(prefix) => return (prefix, Slice::from_u8_slice(b"")),
+ Err(err) => err,
+ };
+ let utf8_len = utf8_err.valid_up_to();
+ if utf8_len == 0 {
+ return ("", self);
+ }
+ // SAFETY: `Utf8Error::valid_up_to()` returns an index up to which
+ // valid UTF-8 has been verified.
+ let prefix = unsafe { str::from_utf8_unchecked(&self.inner[..utf8_len]) };
+ (prefix, Slice::from_u8_slice(&self.inner[utf8_len..]))
+ }
+
pub fn to_string_lossy(&self) -> Cow<'_, str> {
String::from_utf8_lossy(&self.inner)
}
@@ -269,4 +306,51 @@ impl Slice {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}
+
+ #[inline]
+ pub fn starts_with_str(&self, prefix: &str) -> bool {
+ self.inner.starts_with(prefix.as_bytes())
+ }
+
+ pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> {
+ let (p, _) = self.to_str_split();
+ let prefix_len = match prefix.into_searcher(p).next() {
+ SearchStep::Match(0, prefix_len) => prefix_len,
+ _ => return None,
+ };
+
+ // SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
+ // and `Searcher` is known to return valid indices.
+ unsafe {
+ let suffix = self.inner.get_unchecked(prefix_len..);
+ Some(Slice::from_u8_slice(suffix))
+ }
+ }
+
+ #[inline]
+ pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> {
+ if !self.starts_with_str(prefix) {
+ return None;
+ }
+
+ // SAFETY: `prefix` is a prefix of `self.inner`.
+ unsafe {
+ let suffix = self.inner.get_unchecked(prefix.len()..);
+ Some(Slice::from_u8_slice(suffix))
+ }
+ }
+
+ pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> {
+ let (p, _) = self.to_str_split();
+ let (start, end) = delimiter.into_searcher(p).next_match()?;
+
+ // SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
+ // and `Searcher` is known to return valid indices.
+ unsafe {
+ let before = p.get_unchecked(..start);
+ let after = self.inner.get_unchecked(end..);
+
+ Some((before, Slice::from_u8_slice(after)))
+ }
+ }
}
diff --git a/library/std/src/sys/unix/os_str/tests.rs b/library/std/src/sys/unix/os_str/tests.rs
index 22ba0c9235041..e55a7301a1261 100644
--- a/library/std/src/sys/unix/os_str/tests.rs
+++ b/library/std/src/sys/unix/os_str/tests.rs
@@ -16,3 +16,98 @@ fn display() {
Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(),
);
}
+
+#[test]
+fn buf_into_string_split() {
+ let mut string = Buf::from_string(String::from("héllô wørld"));
+ {
+ let (prefix, suffix) = string.clone().into_string_split();
+ assert_eq!(prefix, String::from("héllô wørld"));
+ assert_eq!(suffix.into_inner(), Vec::new());
+ }
+
+ string.push_slice(Slice::from_u8_slice(b"\xFF"));
+ {
+ let (prefix, suffix) = string.clone().into_string_split();
+ assert_eq!(prefix, String::from("héllô wørld"));
+ assert_eq!(suffix.into_inner(), vec![0xFF]);
+ }
+}
+
+#[test]
+fn slice_to_str_split() {
+ let mut string = Buf::from_string(String::from("héllô wørld"));
+ {
+ let (prefix, suffix) = string.as_slice().to_str_split();
+ assert_eq!(prefix, "héllô wørld");
+ assert_eq!(&suffix.inner, b"");
+ }
+
+ string.push_slice(Slice::from_u8_slice(b"\xFF"));
+ {
+ let (prefix, suffix) = string.as_slice().to_str_split();
+ assert_eq!(prefix, String::from("héllô wørld"));
+ assert_eq!(&suffix.inner, b"\xFF");
+ }
+}
+
+#[test]
+fn slice_starts_with_str() {
+ let mut string = Buf::from_string(String::from("héllô="));
+ string.push_slice(Slice::from_u8_slice(b"\xFF"));
+ string.push_slice(Slice::from_str("wørld"));
+ let slice = string.as_slice();
+
+ assert!(slice.starts_with_str("héllô"));
+ assert!(!slice.starts_with_str("héllô=wørld"));
+}
+
+#[test]
+fn slice_strip_prefix() {
+ let mut string = Buf::from_string(String::from("héllô="));
+ string.push_slice(Slice::from_u8_slice(b"\xFF"));
+ string.push_slice(Slice::from_str("wørld"));
+ let slice = string.as_slice();
+
+ assert!(slice.strip_prefix("héllô=wørld").is_none());
+
+ {
+ let suffix = slice.strip_prefix('h');
+ assert!(suffix.is_some());
+ assert_eq!(&suffix.unwrap().inner, b"\xC3\xA9ll\xC3\xB4=\xFFw\xC3\xB8rld",);
+ }
+
+ {
+ let suffix = slice.strip_prefix("héllô");
+ assert!(suffix.is_some());
+ assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld");
+ }
+}
+
+#[test]
+fn slice_strip_prefix_str() {
+ let mut string = Buf::from_string(String::from("héllô="));
+ string.push_slice(Slice::from_u8_slice(b"\xFF"));
+ string.push_slice(Slice::from_str("wørld"));
+ let slice = string.as_slice();
+
+ assert!(slice.strip_prefix_str("héllô=wørld").is_none());
+
+ let suffix = slice.strip_prefix_str("héllô");
+ assert!(suffix.is_some());
+ assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld");
+}
+
+#[test]
+fn slice_split_once() {
+ let mut string = Buf::from_string(String::from("héllô="));
+ string.push_slice(Slice::from_u8_slice(b"\xFF"));
+ string.push_slice(Slice::from_str("wørld"));
+ let slice = string.as_slice();
+
+ let split = slice.split_once('=');
+ assert!(split.is_some());
+ let (prefix, suffix) = split.unwrap();
+ assert_eq!(prefix, "héllô");
+ assert_eq!(&suffix.inner, b"\xFFw\xC3\xB8rld");
+}
diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs
index 4bdd8c505ff25..b81ae42a2e255 100644
--- a/library/std/src/sys/windows/os_str.rs
+++ b/library/std/src/sys/windows/os_str.rs
@@ -5,6 +5,7 @@ use crate::collections::TryReserveError;
use crate::fmt;
use crate::mem;
use crate::rc::Rc;
+use crate::str::pattern::Pattern;
use crate::sync::Arc;
use crate::sys_common::wtf8::{Wtf8, Wtf8Buf};
use crate::sys_common::{AsInner, FromInner, IntoInner};
@@ -98,6 +99,11 @@ impl Buf {
self.inner.into_string().map_err(|buf| Buf { inner: buf })
}
+ pub fn into_string_split(self) -> (String, Buf) {
+ let (prefix, suffix) = self.inner.into_string_split();
+ (prefix, Buf { inner: suffix })
+ }
+
pub fn push_slice(&mut self, s: &Slice) {
self.inner.push_wtf8(&s.inner)
}
@@ -155,10 +161,22 @@ impl Slice {
unsafe { mem::transmute(Wtf8::from_str(s)) }
}
+ #[inline]
+ fn from_inner(inner: &Wtf8) -> &Slice {
+ // SAFETY: Slice is just a wrapper of Wtf8,
+ // therefore converting &Wtf8 to &Slice is safe.
+ unsafe { &*(inner as *const Wtf8 as *const Slice) }
+ }
+
pub fn to_str(&self) -> Option<&str> {
self.inner.as_str()
}
+ pub fn to_str_split(&self) -> (&str, &Slice) {
+ let (prefix, suffix) = self.inner.to_str_split();
+ (prefix, Slice::from_inner(suffix))
+ }
+
pub fn to_string_lossy(&self) -> Cow<'_, str> {
self.inner.to_string_lossy()
}
@@ -221,4 +239,25 @@ impl Slice {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}
+
+ #[inline]
+ pub fn starts_with_str(&self, prefix: &str) -> bool {
+ self.inner.starts_with_str(prefix)
+ }
+
+ #[inline]
+ pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> {
+ Some(Slice::from_inner(self.inner.strip_prefix(prefix)?))
+ }
+
+ #[inline]
+ pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> {
+ Some(Slice::from_inner(self.inner.strip_prefix_str(prefix)?))
+ }
+
+ #[inline]
+ pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> {
+ let (before, after) = self.inner.split_once(delimiter)?;
+ Some((before, Slice::from_inner(after)))
+ }
}
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index bc588bdbb3ce6..d5afaac3bd861 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -31,6 +31,7 @@ use crate::ops;
use crate::rc::Rc;
use crate::slice;
use crate::str;
+use crate::str::pattern::{Pattern, SearchStep, Searcher};
use crate::sync::Arc;
use crate::sys_common::AsInner;
@@ -441,6 +442,42 @@ impl Wtf8Buf {
}
}
+ /// Consumes the WTF-8 string and converts it to a (UTF-8, WTF-8) pair.
+ ///
+ /// This does not copy the data.
+ ///
+ /// The first element of the return value is the longest prefix of valid
+ /// UTF-8, with the second element being the remainder.
+ pub fn into_string_split(self) -> (String, Wtf8Buf) {
+ if self.is_known_utf8 {
+ // SAFETY: The inner value is known to be UTF-8.
+ let utf8 = unsafe { String::from_utf8_unchecked(self.bytes) };
+ return (utf8, Wtf8Buf::new());
+ }
+
+ let surrogate_pos = match self.next_surrogate(0) {
+ None => {
+ // SAFETY: Well-formed WTF-8 that contains no surrogates is
+ // also well-formed UTF-8.
+ let utf8 = unsafe { String::from_utf8_unchecked(self.bytes) };
+ return (utf8, Wtf8Buf::new());
+ }
+ Some((surrogate_pos, _)) => surrogate_pos,
+ };
+
+ if surrogate_pos == 0 {
+ return (String::new(), self);
+ }
+
+ let mut utf8_bytes = self.bytes;
+ let wtf8_bytes = utf8_bytes.split_off(surrogate_pos);
+ // SAFETY: `utf8_bytes` is a prefix of a WTF-8 value that contains no
+ // surrogates, and well-formed WTF-8 that contains no surrogates is
+ // also well-formed UTF-8.
+ let utf8 = unsafe { String::from_utf8_unchecked(utf8_bytes) };
+ (utf8, Wtf8Buf { bytes: wtf8_bytes, is_known_utf8: false })
+ }
+
/// Converts this `Wtf8Buf` into a boxed `Wtf8`.
#[inline]
pub fn into_box(self) -> Box {
@@ -664,6 +701,38 @@ impl Wtf8 {
}
}
+ /// Losslessly split a WTF-8 string into to a (UTF-8, WTF-8) pair.
+ ///
+ /// This does not copy the data.
+ ///
+ /// The first element of the return value is the longest prefix of valid
+ /// UTF-8, with the second element being the remainder.
+ pub fn to_str_split(&self) -> (&str, &Wtf8) {
+ let surrogate_pos = match self.next_surrogate(0) {
+ None => {
+ // SAFETY: Well-formed WTF-8 that contains no surrogates is
+ // also well-formed UTF-8.
+ let utf8 = unsafe { str::from_utf8_unchecked(&self.bytes) };
+ return (utf8, Wtf8::from_str(""));
+ }
+ Some((surrogate_pos, _)) => surrogate_pos,
+ };
+
+ if surrogate_pos == 0 {
+ return ("", self);
+ }
+
+ let (utf8_bytes, wtf8_bytes) = self.bytes.split_at(surrogate_pos);
+ // SAFETY: `utf8_bytes` is a prefix of a WTF-8 value that contains no
+ // surrogates, and well-formed WTF-8 that contains no surrogates is
+ // also well-formed UTF-8.
+ unsafe {
+ let utf8 = str::from_utf8_unchecked(utf8_bytes);
+ let wtf8 = Wtf8::from_bytes_unchecked(wtf8_bytes);
+ (utf8, wtf8)
+ }
+ }
+
/// Converts the WTF-8 string to potentially ill-formed UTF-16
/// and return an iterator of 16-bit code units.
///
@@ -780,6 +849,52 @@ impl Wtf8 {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.bytes.eq_ignore_ascii_case(&other.bytes)
}
+
+ #[inline]
+ pub fn starts_with_str(&self, prefix: &str) -> bool {
+ self.bytes.starts_with(prefix.as_bytes())
+ }
+
+ pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Wtf8> {
+ let (p, _) = self.to_str_split();
+ let prefix_len = match prefix.into_searcher(p).next() {
+ SearchStep::Match(0, prefix_len) => prefix_len,
+ _ => return None,
+ };
+
+ // SAFETY: `p` is guaranteed to be a prefix of `self.bytes`,
+ // and `Searcher` is known to return valid indices.
+ unsafe {
+ let suffix = self.bytes.get_unchecked(prefix_len..);
+ Some(Wtf8::from_bytes_unchecked(suffix))
+ }
+ }
+
+ #[inline]
+ pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Wtf8> {
+ if !self.starts_with_str(prefix) {
+ return None;
+ }
+
+ // SAFETY: `prefix` is a prefix of `self.bytes`.
+ unsafe {
+ let suffix = self.bytes.get_unchecked(prefix.len()..);
+ Some(Wtf8::from_bytes_unchecked(suffix))
+ }
+ }
+
+ pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Wtf8)> {
+ let (p, _) = self.to_str_split();
+ let (start, end) = delimiter.into_searcher(p).next_match()?;
+
+ // SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
+ // and `Searcher` is known to return valid indices.
+ unsafe {
+ let before = p.get_unchecked(..start);
+ let after = self.bytes.get_unchecked(end..);
+ Some((before, Wtf8::from_bytes_unchecked(after)))
+ }
+ }
}
/// Returns a slice of the given string for the byte range \[`begin`..`end`).
diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs
index 1a302d646941b..8b58834eda7be 100644
--- a/library/std/src/sys_common/wtf8/tests.rs
+++ b/library/std/src/sys_common/wtf8/tests.rs
@@ -352,6 +352,26 @@ fn wtf8buf_into_string_lossy() {
assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�"));
}
+#[test]
+fn wtf8buf_into_string_split() {
+ // is_known_utf8
+ let mut string = Wtf8Buf::from_str("aé");
+ assert_eq!(string.clone().into_string_split(), (String::from("aé"), Wtf8Buf::new()),);
+
+ // !is_known_utf8, next_surrogate(0).is_none()
+ string.push_char(' ');
+ string.push(CodePoint::from_u32(0xD83D).unwrap());
+ string.push(CodePoint::from_u32(0xDCA9).unwrap());
+ assert_eq!(string.clone().into_string_split(), (String::from("aé 💩"), Wtf8Buf::new()),);
+
+ // !is_known_utf8, next_surrogate(0).is_some()
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(
+ string.clone().into_string_split(),
+ (String::from("aé 💩"), Wtf8Buf::from_wide(&[0xD800])),
+ );
+}
+
#[test]
fn wtf8buf_from_iterator() {
fn f(values: &[u32]) -> Wtf8Buf {
@@ -538,6 +558,20 @@ fn wtf8_to_string_lossy() {
assert_eq!(string.to_string_lossy(), expected);
}
+#[test]
+fn wtf8_to_str_split() {
+ // next_surrogate(0).is_none()
+ let mut string = Wtf8Buf::from_str("aé 💩");
+ assert_eq!(string.as_slice().to_str_split(), ("aé 💩", Wtf8::from_str("")),);
+
+ // next_surrogate(0).is_some()
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ assert_eq!(
+ string.as_slice().to_str_split(),
+ ("aé 💩", Wtf8Buf::from_wide(&[0xD800]).as_slice()),
+ );
+}
+
#[test]
fn wtf8_display() {
fn d(b: &[u8]) -> String {
@@ -664,3 +698,64 @@ fn wtf8_to_owned() {
assert_eq!(string.bytes, b"\xED\xA0\x80");
assert!(!string.is_known_utf8);
}
+
+#[test]
+fn wtf8_starts_with_str() {
+ let mut string = Wtf8Buf::from_str("héllô=");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ string.push_str("wørld");
+ let slice = string.as_slice();
+
+ assert!(slice.starts_with_str("héllô"));
+ assert!(!slice.starts_with_str("héllô=wørld"));
+}
+
+#[test]
+fn wtf8_strip_prefix() {
+ let mut string = Wtf8Buf::from_str("héllô=");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ string.push_str("wørld");
+ let slice = string.as_slice();
+
+ assert!(slice.strip_prefix("héllô=wørld").is_none());
+
+ {
+ let suffix = slice.strip_prefix('h');
+ assert!(suffix.is_some());
+ assert_eq!(&suffix.unwrap().bytes, b"\xC3\xA9ll\xC3\xB4=\xED\xA0\x80w\xC3\xB8rld",);
+ }
+
+ {
+ let suffix = slice.strip_prefix("héllô");
+ assert!(suffix.is_some());
+ assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld");
+ }
+}
+
+#[test]
+fn wtf8_strip_prefix_str() {
+ let mut string = Wtf8Buf::from_str("héllô=");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ string.push_str("wørld");
+ let slice = string.as_slice();
+
+ assert!(slice.strip_prefix_str("héllô=wørld").is_none());
+
+ let suffix = slice.strip_prefix_str("héllô");
+ assert!(suffix.is_some());
+ assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld");
+}
+
+#[test]
+fn wtf8_split_once() {
+ let mut string = Wtf8Buf::from_str("héllô=");
+ string.push(CodePoint::from_u32(0xD800).unwrap());
+ string.push_str("wørld");
+ let slice = string.as_slice();
+
+ let split = slice.split_once('=');
+ assert!(split.is_some());
+ let (prefix, suffix) = split.unwrap();
+ assert_eq!(prefix, "héllô");
+ assert_eq!(&suffix.bytes, b"\xED\xA0\x80w\xC3\xB8rld");
+}