diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 5c0541d3caf33..c9d02bd54194a 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -8,6 +8,7 @@ use crate::fmt; use crate::hash::{Hash, Hasher}; use crate::ops; use crate::rc::Rc; +use crate::str::pattern::Pattern; use crate::str::FromStr; use crate::sync::Arc; @@ -178,6 +179,34 @@ impl OsString { self.inner.into_string().map_err(|buf| OsString { inner: buf }) } + /// Splits the `OsString` into a Unicode prefix and non-Unicode suffix. + /// + /// The returned `String` is the longest prefix of the `OsString` that + /// contained valid Unicode. The returned `OsString` is the rest of the + /// original value. + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::OsString; + /// + /// let os_string = OsString::from("foo"); + /// let (prefix, suffix) = os_string.clone().into_string_split(); + /// + /// let mut rejoined = OsString::from(prefix); + /// rejoined.push(suffix); + /// assert_eq!(rejoined, os_string); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn into_string_split(self) -> (String, OsString) { + let (prefix, suffix) = self.inner.into_string_split(); + (prefix, OsString { inner: suffix }) + } + /// Extends the string with the given &[OsStr] slice. /// /// # Examples @@ -703,6 +732,34 @@ impl OsStr { self.inner.to_str() } + /// Splits the `OsStr` into a Unicode prefix and non-Unicode suffix. + /// + /// The returned `str` is the longest prefix of the `OsStr` that + /// contained valid Unicode. The returned `OsStr` is the rest of the + /// original value. + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let os_str = OsStr::new("foo"); + /// let (prefix, suffix) = os_str.to_str_split(); + /// + /// let mut rejoined = OsString::from(prefix); + /// rejoined.push(suffix); + /// assert_eq!(rejoined, os_str); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn to_str_split(&self) -> (&str, &OsStr) { + let (prefix, suffix) = self.inner.to_str_split(); + (prefix, Self::from_inner(suffix)) + } + /// Converts an `OsStr` to a [Cow]<[str]>. /// /// Any non-Unicode sequences are replaced with @@ -978,6 +1035,148 @@ impl OsStr { pub fn eq_ignore_ascii_case>(&self, other: S) -> bool { self.inner.eq_ignore_ascii_case(&other.as_ref().inner) } + + /// Returns `true` if the given pattern matches a prefix of this `OsStr`. + /// + /// Returns `false` if it does not. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: crate::str::pattern + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::OsString; + /// + /// let bananas = OsString::from("bananas"); + /// + /// assert!(bananas.starts_with("bana")); + /// assert!(!bananas.starts_with("nana")); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool { + let (p, _) = self.inner.to_str_split(); + p.starts_with(pattern) + } + + /// Returns `true` if the given `str` matches a prefix of this `OsStr`. + /// + /// Same as [`OsStr::starts_with`], but is easier to optimize to a + /// direct bitwise comparison. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::OsString; + /// + /// let bananas = OsString::from("bananas"); + /// + /// assert!(bananas.starts_with_str("bana")); + /// assert!(!bananas.starts_with_str("nana")); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.inner.starts_with_str(prefix) + } + + /// Returns this `OsStr` with the given prefix removed. + /// + /// If the `OsStr` starts with the pattern `prefix`, returns the substring + /// after the prefix, wrapped in `Some`. + /// + /// If the `OsStr` does not start with `prefix`, returns `None`. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: crate::str::pattern + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let foobar = OsString::from("foo:bar"); + /// + /// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar"))); + /// assert_eq!(foobar.strip_prefix("bar"), None); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a OsStr> { + Some(OsStr::from_inner(self.inner.strip_prefix(prefix)?)) + } + + /// Returns this `OsStr` with the given prefix removed. + /// + /// Same as [`OsStr::strip_prefix`], but is easier to optimize to a + /// direct bitwise comparison. + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let foobar = OsString::from("foo:bar"); + /// + /// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar"))); + /// assert_eq!(foobar.strip_prefix_str("bar"), None); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&OsStr> { + Some(OsStr::from_inner(self.inner.strip_prefix_str(prefix)?)) + } + + /// Splits this `OsStr` on the first occurrence of the specified delimiter, + /// returning the prefix before delimiter and suffix after delimiter. + /// + /// The prefix is returned as a `str`, because a successful `Pattern` match + /// implies its matching prefix was valid Unicode. + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let foo = OsString::from("foo:"); + /// let foobar = OsString::from("foo:bar"); + /// + /// assert_eq!(foo.split_once(':'), Some(("foo", OsStr::new("")))); + /// assert_eq!(foobar.split_once(':'), Some(("foo", OsStr::new("bar")))); + /// assert_eq!(foobar.split_once('='), None); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a OsStr)> { + let (before, after) = self.inner.split_once(delimiter)?; + Some((before, OsStr::from_inner(after))) + } } #[stable(feature = "box_from_os_str", since = "1.17.0")] diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 318a46d1b637e..4188c8c950bdb 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -264,6 +264,7 @@ #![feature(needs_panic_runtime)] #![feature(negative_impls)] #![feature(never_type)] +#![feature(pattern)] #![feature(platform_intrinsics)] #![feature(prelude_import)] #![feature(rustc_attrs)] diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs index 017e2af29d4f4..051ffde8ad557 100644 --- a/library/std/src/sys/unix/os_str.rs +++ b/library/std/src/sys/unix/os_str.rs @@ -8,6 +8,7 @@ use crate::fmt::Write; use crate::mem; use crate::rc::Rc; use crate::str; +use crate::str::pattern::{Pattern, SearchStep, Searcher}; use crate::sync::Arc; use crate::sys_common::{AsInner, IntoInner}; @@ -164,6 +165,27 @@ impl Buf { String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() }) } + pub fn into_string_split(self) -> (String, Buf) { + let utf8_err = match str::from_utf8(&self.inner) { + Ok(_) => { + // SAFETY: If `str::from_utf8()` succeeds then the input is UTF-8. + let prefix = unsafe { String::from_utf8_unchecked(self.inner) }; + return (prefix, Buf { inner: Vec::new() }); + } + Err(err) => err, + }; + let utf8_len = utf8_err.valid_up_to(); + if utf8_len == 0 { + return (String::new(), self); + } + let mut utf8_bytes = self.inner; + let rem_bytes = utf8_bytes.split_off(utf8_len); + // SAFETY: `Utf8Error::valid_up_to()` returns an index up to which + // valid UTF-8 has been verified. + let prefix = unsafe { String::from_utf8_unchecked(utf8_bytes) }; + (prefix, Buf { inner: rem_bytes }) + } + pub fn push_slice(&mut self, s: &Slice) { self.inner.extend_from_slice(&s.inner) } @@ -205,6 +227,21 @@ impl Slice { str::from_utf8(&self.inner).ok() } + pub fn to_str_split(&self) -> (&str, &Slice) { + let utf8_err = match str::from_utf8(&self.inner) { + Ok(prefix) => return (prefix, Slice::from_u8_slice(b"")), + Err(err) => err, + }; + let utf8_len = utf8_err.valid_up_to(); + if utf8_len == 0 { + return ("", self); + } + // SAFETY: `Utf8Error::valid_up_to()` returns an index up to which + // valid UTF-8 has been verified. + let prefix = unsafe { str::from_utf8_unchecked(&self.inner[..utf8_len]) }; + (prefix, Slice::from_u8_slice(&self.inner[utf8_len..])) + } + pub fn to_string_lossy(&self) -> Cow<'_, str> { String::from_utf8_lossy(&self.inner) } @@ -269,4 +306,51 @@ impl Slice { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.inner.eq_ignore_ascii_case(&other.inner) } + + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.inner.starts_with(prefix.as_bytes()) + } + + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> { + let (p, _) = self.to_str_split(); + let prefix_len = match prefix.into_searcher(p).next() { + SearchStep::Match(0, prefix_len) => prefix_len, + _ => return None, + }; + + // SAFETY: `p` is guaranteed to be a prefix of `self.inner`, + // and `Searcher` is known to return valid indices. + unsafe { + let suffix = self.inner.get_unchecked(prefix_len..); + Some(Slice::from_u8_slice(suffix)) + } + } + + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> { + if !self.starts_with_str(prefix) { + return None; + } + + // SAFETY: `prefix` is a prefix of `self.inner`. + unsafe { + let suffix = self.inner.get_unchecked(prefix.len()..); + Some(Slice::from_u8_slice(suffix)) + } + } + + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> { + let (p, _) = self.to_str_split(); + let (start, end) = delimiter.into_searcher(p).next_match()?; + + // SAFETY: `p` is guaranteed to be a prefix of `self.inner`, + // and `Searcher` is known to return valid indices. + unsafe { + let before = p.get_unchecked(..start); + let after = self.inner.get_unchecked(end..); + + Some((before, Slice::from_u8_slice(after))) + } + } } diff --git a/library/std/src/sys/unix/os_str/tests.rs b/library/std/src/sys/unix/os_str/tests.rs index 22ba0c9235041..e55a7301a1261 100644 --- a/library/std/src/sys/unix/os_str/tests.rs +++ b/library/std/src/sys/unix/os_str/tests.rs @@ -16,3 +16,98 @@ fn display() { Slice::from_u8_slice(b"Hello\xC0\x80 There\xE6\x83 Goodbye").to_string(), ); } + +#[test] +fn buf_into_string_split() { + let mut string = Buf::from_string(String::from("héllô wørld")); + { + let (prefix, suffix) = string.clone().into_string_split(); + assert_eq!(prefix, String::from("héllô wørld")); + assert_eq!(suffix.into_inner(), Vec::new()); + } + + string.push_slice(Slice::from_u8_slice(b"\xFF")); + { + let (prefix, suffix) = string.clone().into_string_split(); + assert_eq!(prefix, String::from("héllô wørld")); + assert_eq!(suffix.into_inner(), vec![0xFF]); + } +} + +#[test] +fn slice_to_str_split() { + let mut string = Buf::from_string(String::from("héllô wørld")); + { + let (prefix, suffix) = string.as_slice().to_str_split(); + assert_eq!(prefix, "héllô wørld"); + assert_eq!(&suffix.inner, b""); + } + + string.push_slice(Slice::from_u8_slice(b"\xFF")); + { + let (prefix, suffix) = string.as_slice().to_str_split(); + assert_eq!(prefix, String::from("héllô wørld")); + assert_eq!(&suffix.inner, b"\xFF"); + } +} + +#[test] +fn slice_starts_with_str() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.starts_with_str("héllô")); + assert!(!slice.starts_with_str("héllô=wørld")); +} + +#[test] +fn slice_strip_prefix() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.strip_prefix("héllô=wørld").is_none()); + + { + let suffix = slice.strip_prefix('h'); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"\xC3\xA9ll\xC3\xB4=\xFFw\xC3\xB8rld",); + } + + { + let suffix = slice.strip_prefix("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld"); + } +} + +#[test] +fn slice_strip_prefix_str() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.strip_prefix_str("héllô=wørld").is_none()); + + let suffix = slice.strip_prefix_str("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld"); +} + +#[test] +fn slice_split_once() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + let split = slice.split_once('='); + assert!(split.is_some()); + let (prefix, suffix) = split.unwrap(); + assert_eq!(prefix, "héllô"); + assert_eq!(&suffix.inner, b"\xFFw\xC3\xB8rld"); +} diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs index 4bdd8c505ff25..b81ae42a2e255 100644 --- a/library/std/src/sys/windows/os_str.rs +++ b/library/std/src/sys/windows/os_str.rs @@ -5,6 +5,7 @@ use crate::collections::TryReserveError; use crate::fmt; use crate::mem; use crate::rc::Rc; +use crate::str::pattern::Pattern; use crate::sync::Arc; use crate::sys_common::wtf8::{Wtf8, Wtf8Buf}; use crate::sys_common::{AsInner, FromInner, IntoInner}; @@ -98,6 +99,11 @@ impl Buf { self.inner.into_string().map_err(|buf| Buf { inner: buf }) } + pub fn into_string_split(self) -> (String, Buf) { + let (prefix, suffix) = self.inner.into_string_split(); + (prefix, Buf { inner: suffix }) + } + pub fn push_slice(&mut self, s: &Slice) { self.inner.push_wtf8(&s.inner) } @@ -155,10 +161,22 @@ impl Slice { unsafe { mem::transmute(Wtf8::from_str(s)) } } + #[inline] + fn from_inner(inner: &Wtf8) -> &Slice { + // SAFETY: Slice is just a wrapper of Wtf8, + // therefore converting &Wtf8 to &Slice is safe. + unsafe { &*(inner as *const Wtf8 as *const Slice) } + } + pub fn to_str(&self) -> Option<&str> { self.inner.as_str() } + pub fn to_str_split(&self) -> (&str, &Slice) { + let (prefix, suffix) = self.inner.to_str_split(); + (prefix, Slice::from_inner(suffix)) + } + pub fn to_string_lossy(&self) -> Cow<'_, str> { self.inner.to_string_lossy() } @@ -221,4 +239,25 @@ impl Slice { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.inner.eq_ignore_ascii_case(&other.inner) } + + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.inner.starts_with_str(prefix) + } + + #[inline] + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> { + Some(Slice::from_inner(self.inner.strip_prefix(prefix)?)) + } + + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> { + Some(Slice::from_inner(self.inner.strip_prefix_str(prefix)?)) + } + + #[inline] + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> { + let (before, after) = self.inner.split_once(delimiter)?; + Some((before, Slice::from_inner(after))) + } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index bc588bdbb3ce6..d5afaac3bd861 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -31,6 +31,7 @@ use crate::ops; use crate::rc::Rc; use crate::slice; use crate::str; +use crate::str::pattern::{Pattern, SearchStep, Searcher}; use crate::sync::Arc; use crate::sys_common::AsInner; @@ -441,6 +442,42 @@ impl Wtf8Buf { } } + /// Consumes the WTF-8 string and converts it to a (UTF-8, WTF-8) pair. + /// + /// This does not copy the data. + /// + /// The first element of the return value is the longest prefix of valid + /// UTF-8, with the second element being the remainder. + pub fn into_string_split(self) -> (String, Wtf8Buf) { + if self.is_known_utf8 { + // SAFETY: The inner value is known to be UTF-8. + let utf8 = unsafe { String::from_utf8_unchecked(self.bytes) }; + return (utf8, Wtf8Buf::new()); + } + + let surrogate_pos = match self.next_surrogate(0) { + None => { + // SAFETY: Well-formed WTF-8 that contains no surrogates is + // also well-formed UTF-8. + let utf8 = unsafe { String::from_utf8_unchecked(self.bytes) }; + return (utf8, Wtf8Buf::new()); + } + Some((surrogate_pos, _)) => surrogate_pos, + }; + + if surrogate_pos == 0 { + return (String::new(), self); + } + + let mut utf8_bytes = self.bytes; + let wtf8_bytes = utf8_bytes.split_off(surrogate_pos); + // SAFETY: `utf8_bytes` is a prefix of a WTF-8 value that contains no + // surrogates, and well-formed WTF-8 that contains no surrogates is + // also well-formed UTF-8. + let utf8 = unsafe { String::from_utf8_unchecked(utf8_bytes) }; + (utf8, Wtf8Buf { bytes: wtf8_bytes, is_known_utf8: false }) + } + /// Converts this `Wtf8Buf` into a boxed `Wtf8`. #[inline] pub fn into_box(self) -> Box { @@ -664,6 +701,38 @@ impl Wtf8 { } } + /// Losslessly split a WTF-8 string into to a (UTF-8, WTF-8) pair. + /// + /// This does not copy the data. + /// + /// The first element of the return value is the longest prefix of valid + /// UTF-8, with the second element being the remainder. + pub fn to_str_split(&self) -> (&str, &Wtf8) { + let surrogate_pos = match self.next_surrogate(0) { + None => { + // SAFETY: Well-formed WTF-8 that contains no surrogates is + // also well-formed UTF-8. + let utf8 = unsafe { str::from_utf8_unchecked(&self.bytes) }; + return (utf8, Wtf8::from_str("")); + } + Some((surrogate_pos, _)) => surrogate_pos, + }; + + if surrogate_pos == 0 { + return ("", self); + } + + let (utf8_bytes, wtf8_bytes) = self.bytes.split_at(surrogate_pos); + // SAFETY: `utf8_bytes` is a prefix of a WTF-8 value that contains no + // surrogates, and well-formed WTF-8 that contains no surrogates is + // also well-formed UTF-8. + unsafe { + let utf8 = str::from_utf8_unchecked(utf8_bytes); + let wtf8 = Wtf8::from_bytes_unchecked(wtf8_bytes); + (utf8, wtf8) + } + } + /// Converts the WTF-8 string to potentially ill-formed UTF-16 /// and return an iterator of 16-bit code units. /// @@ -780,6 +849,52 @@ impl Wtf8 { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.bytes.eq_ignore_ascii_case(&other.bytes) } + + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.bytes.starts_with(prefix.as_bytes()) + } + + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Wtf8> { + let (p, _) = self.to_str_split(); + let prefix_len = match prefix.into_searcher(p).next() { + SearchStep::Match(0, prefix_len) => prefix_len, + _ => return None, + }; + + // SAFETY: `p` is guaranteed to be a prefix of `self.bytes`, + // and `Searcher` is known to return valid indices. + unsafe { + let suffix = self.bytes.get_unchecked(prefix_len..); + Some(Wtf8::from_bytes_unchecked(suffix)) + } + } + + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Wtf8> { + if !self.starts_with_str(prefix) { + return None; + } + + // SAFETY: `prefix` is a prefix of `self.bytes`. + unsafe { + let suffix = self.bytes.get_unchecked(prefix.len()..); + Some(Wtf8::from_bytes_unchecked(suffix)) + } + } + + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Wtf8)> { + let (p, _) = self.to_str_split(); + let (start, end) = delimiter.into_searcher(p).next_match()?; + + // SAFETY: `p` is guaranteed to be a prefix of `self.inner`, + // and `Searcher` is known to return valid indices. + unsafe { + let before = p.get_unchecked(..start); + let after = self.bytes.get_unchecked(end..); + Some((before, Wtf8::from_bytes_unchecked(after))) + } + } } /// Returns a slice of the given string for the byte range \[`begin`..`end`). diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 1a302d646941b..8b58834eda7be 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -352,6 +352,26 @@ fn wtf8buf_into_string_lossy() { assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�")); } +#[test] +fn wtf8buf_into_string_split() { + // is_known_utf8 + let mut string = Wtf8Buf::from_str("aé"); + assert_eq!(string.clone().into_string_split(), (String::from("aé"), Wtf8Buf::new()),); + + // !is_known_utf8, next_surrogate(0).is_none() + string.push_char(' '); + string.push(CodePoint::from_u32(0xD83D).unwrap()); + string.push(CodePoint::from_u32(0xDCA9).unwrap()); + assert_eq!(string.clone().into_string_split(), (String::from("aé 💩"), Wtf8Buf::new()),); + + // !is_known_utf8, next_surrogate(0).is_some() + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!( + string.clone().into_string_split(), + (String::from("aé 💩"), Wtf8Buf::from_wide(&[0xD800])), + ); +} + #[test] fn wtf8buf_from_iterator() { fn f(values: &[u32]) -> Wtf8Buf { @@ -538,6 +558,20 @@ fn wtf8_to_string_lossy() { assert_eq!(string.to_string_lossy(), expected); } +#[test] +fn wtf8_to_str_split() { + // next_surrogate(0).is_none() + let mut string = Wtf8Buf::from_str("aé 💩"); + assert_eq!(string.as_slice().to_str_split(), ("aé 💩", Wtf8::from_str("")),); + + // next_surrogate(0).is_some() + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!( + string.as_slice().to_str_split(), + ("aé 💩", Wtf8Buf::from_wide(&[0xD800]).as_slice()), + ); +} + #[test] fn wtf8_display() { fn d(b: &[u8]) -> String { @@ -664,3 +698,64 @@ fn wtf8_to_owned() { assert_eq!(string.bytes, b"\xED\xA0\x80"); assert!(!string.is_known_utf8); } + +#[test] +fn wtf8_starts_with_str() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.starts_with_str("héllô")); + assert!(!slice.starts_with_str("héllô=wørld")); +} + +#[test] +fn wtf8_strip_prefix() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.strip_prefix("héllô=wørld").is_none()); + + { + let suffix = slice.strip_prefix('h'); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"\xC3\xA9ll\xC3\xB4=\xED\xA0\x80w\xC3\xB8rld",); + } + + { + let suffix = slice.strip_prefix("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld"); + } +} + +#[test] +fn wtf8_strip_prefix_str() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.strip_prefix_str("héllô=wørld").is_none()); + + let suffix = slice.strip_prefix_str("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld"); +} + +#[test] +fn wtf8_split_once() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + let split = slice.split_once('='); + assert!(split.is_some()); + let (prefix, suffix) = split.unwrap(); + assert_eq!(prefix, "héllô"); + assert_eq!(&suffix.bytes, b"\xED\xA0\x80w\xC3\xB8rld"); +}