diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index eb411e9aad99f..c9d02bd54194a 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -8,6 +8,7 @@ use crate::fmt; use crate::hash::{Hash, Hasher}; use crate::ops; use crate::rc::Rc; +use crate::str::pattern::Pattern; use crate::str::FromStr; use crate::sync::Arc; @@ -1034,6 +1035,148 @@ impl OsStr { pub fn eq_ignore_ascii_case>(&self, other: S) -> bool { self.inner.eq_ignore_ascii_case(&other.as_ref().inner) } + + /// Returns `true` if the given pattern matches a prefix of this `OsStr`. + /// + /// Returns `false` if it does not. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: crate::str::pattern + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::OsString; + /// + /// let bananas = OsString::from("bananas"); + /// + /// assert!(bananas.starts_with("bana")); + /// assert!(!bananas.starts_with("nana")); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool { + let (p, _) = self.inner.to_str_split(); + p.starts_with(pattern) + } + + /// Returns `true` if the given `str` matches a prefix of this `OsStr`. + /// + /// Same as [`OsStr::starts_with`], but is easier to optimize to a + /// direct bitwise comparison. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::OsString; + /// + /// let bananas = OsString::from("bananas"); + /// + /// assert!(bananas.starts_with_str("bana")); + /// assert!(!bananas.starts_with_str("nana")); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.inner.starts_with_str(prefix) + } + + /// Returns this `OsStr` with the given prefix removed. + /// + /// If the `OsStr` starts with the pattern `prefix`, returns the substring + /// after the prefix, wrapped in `Some`. + /// + /// If the `OsStr` does not start with `prefix`, returns `None`. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: crate::str::pattern + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let foobar = OsString::from("foo:bar"); + /// + /// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar"))); + /// assert_eq!(foobar.strip_prefix("bar"), None); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a OsStr> { + Some(OsStr::from_inner(self.inner.strip_prefix(prefix)?)) + } + + /// Returns this `OsStr` with the given prefix removed. + /// + /// Same as [`OsStr::strip_prefix`], but is easier to optimize to a + /// direct bitwise comparison. + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let foobar = OsString::from("foo:bar"); + /// + /// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar"))); + /// assert_eq!(foobar.strip_prefix_str("bar"), None); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&OsStr> { + Some(OsStr::from_inner(self.inner.strip_prefix_str(prefix)?)) + } + + /// Splits this `OsStr` on the first occurrence of the specified delimiter, + /// returning the prefix before delimiter and suffix after delimiter. + /// + /// The prefix is returned as a `str`, because a successful `Pattern` match + /// implies its matching prefix was valid Unicode. + /// + /// # Examples + /// + /// ``` + /// #![feature(osstr_str_prefix_ops)] + /// + /// use std::ffi::{OsStr, OsString}; + /// + /// let foo = OsString::from("foo:"); + /// let foobar = OsString::from("foo:bar"); + /// + /// assert_eq!(foo.split_once(':'), Some(("foo", OsStr::new("")))); + /// assert_eq!(foobar.split_once(':'), Some(("foo", OsStr::new("bar")))); + /// assert_eq!(foobar.split_once('='), None); + /// ``` + #[unstable(feature = "osstr_str_prefix_ops", issue = "none")] + #[must_use] + #[inline] + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a OsStr)> { + let (before, after) = self.inner.split_once(delimiter)?; + Some((before, OsStr::from_inner(after))) + } } #[stable(feature = "box_from_os_str", since = "1.17.0")] diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 318a46d1b637e..4188c8c950bdb 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -264,6 +264,7 @@ #![feature(needs_panic_runtime)] #![feature(negative_impls)] #![feature(never_type)] +#![feature(pattern)] #![feature(platform_intrinsics)] #![feature(prelude_import)] #![feature(rustc_attrs)] diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs index 026a8cc6b3001..051ffde8ad557 100644 --- a/library/std/src/sys/unix/os_str.rs +++ b/library/std/src/sys/unix/os_str.rs @@ -8,6 +8,7 @@ use crate::fmt::Write; use crate::mem; use crate::rc::Rc; use crate::str; +use crate::str::pattern::{Pattern, SearchStep, Searcher}; use crate::sync::Arc; use crate::sys_common::{AsInner, IntoInner}; @@ -305,4 +306,51 @@ impl Slice { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.inner.eq_ignore_ascii_case(&other.inner) } + + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.inner.starts_with(prefix.as_bytes()) + } + + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> { + let (p, _) = self.to_str_split(); + let prefix_len = match prefix.into_searcher(p).next() { + SearchStep::Match(0, prefix_len) => prefix_len, + _ => return None, + }; + + // SAFETY: `p` is guaranteed to be a prefix of `self.inner`, + // and `Searcher` is known to return valid indices. + unsafe { + let suffix = self.inner.get_unchecked(prefix_len..); + Some(Slice::from_u8_slice(suffix)) + } + } + + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> { + if !self.starts_with_str(prefix) { + return None; + } + + // SAFETY: `prefix` is a prefix of `self.inner`. + unsafe { + let suffix = self.inner.get_unchecked(prefix.len()..); + Some(Slice::from_u8_slice(suffix)) + } + } + + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> { + let (p, _) = self.to_str_split(); + let (start, end) = delimiter.into_searcher(p).next_match()?; + + // SAFETY: `p` is guaranteed to be a prefix of `self.inner`, + // and `Searcher` is known to return valid indices. + unsafe { + let before = p.get_unchecked(..start); + let after = self.inner.get_unchecked(end..); + + Some((before, Slice::from_u8_slice(after))) + } + } } diff --git a/library/std/src/sys/unix/os_str/tests.rs b/library/std/src/sys/unix/os_str/tests.rs index e186b7cdabd09..e55a7301a1261 100644 --- a/library/std/src/sys/unix/os_str/tests.rs +++ b/library/std/src/sys/unix/os_str/tests.rs @@ -50,3 +50,64 @@ fn slice_to_str_split() { assert_eq!(&suffix.inner, b"\xFF"); } } + +#[test] +fn slice_starts_with_str() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.starts_with_str("héllô")); + assert!(!slice.starts_with_str("héllô=wørld")); +} + +#[test] +fn slice_strip_prefix() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.strip_prefix("héllô=wørld").is_none()); + + { + let suffix = slice.strip_prefix('h'); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"\xC3\xA9ll\xC3\xB4=\xFFw\xC3\xB8rld",); + } + + { + let suffix = slice.strip_prefix("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld"); + } +} + +#[test] +fn slice_strip_prefix_str() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + assert!(slice.strip_prefix_str("héllô=wørld").is_none()); + + let suffix = slice.strip_prefix_str("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld"); +} + +#[test] +fn slice_split_once() { + let mut string = Buf::from_string(String::from("héllô=")); + string.push_slice(Slice::from_u8_slice(b"\xFF")); + string.push_slice(Slice::from_str("wørld")); + let slice = string.as_slice(); + + let split = slice.split_once('='); + assert!(split.is_some()); + let (prefix, suffix) = split.unwrap(); + assert_eq!(prefix, "héllô"); + assert_eq!(&suffix.inner, b"\xFFw\xC3\xB8rld"); +} diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs index 8b322e5ee1867..b81ae42a2e255 100644 --- a/library/std/src/sys/windows/os_str.rs +++ b/library/std/src/sys/windows/os_str.rs @@ -5,6 +5,7 @@ use crate::collections::TryReserveError; use crate::fmt; use crate::mem; use crate::rc::Rc; +use crate::str::pattern::Pattern; use crate::sync::Arc; use crate::sys_common::wtf8::{Wtf8, Wtf8Buf}; use crate::sys_common::{AsInner, FromInner, IntoInner}; @@ -160,13 +161,20 @@ impl Slice { unsafe { mem::transmute(Wtf8::from_str(s)) } } + #[inline] + fn from_inner(inner: &Wtf8) -> &Slice { + // SAFETY: Slice is just a wrapper of Wtf8, + // therefore converting &Wtf8 to &Slice is safe. + unsafe { &*(inner as *const Wtf8 as *const Slice) } + } + pub fn to_str(&self) -> Option<&str> { self.inner.as_str() } pub fn to_str_split(&self) -> (&str, &Slice) { let (prefix, suffix) = self.inner.to_str_split(); - (prefix, Slice { inner: suffix }) + (prefix, Slice::from_inner(suffix)) } pub fn to_string_lossy(&self) -> Cow<'_, str> { @@ -231,4 +239,25 @@ impl Slice { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.inner.eq_ignore_ascii_case(&other.inner) } + + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.inner.starts_with_str(prefix) + } + + #[inline] + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> { + Some(Slice::from_inner(self.inner.strip_prefix(prefix)?)) + } + + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> { + Some(Slice::from_inner(self.inner.strip_prefix_str(prefix)?)) + } + + #[inline] + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> { + let (before, after) = self.inner.split_once(delimiter)?; + Some((before, Slice::from_inner(after))) + } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index feba676e47a90..d5afaac3bd861 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -31,6 +31,7 @@ use crate::ops; use crate::rc::Rc; use crate::slice; use crate::str; +use crate::str::pattern::{Pattern, SearchStep, Searcher}; use crate::sync::Arc; use crate::sys_common::AsInner; @@ -848,6 +849,52 @@ impl Wtf8 { pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { self.bytes.eq_ignore_ascii_case(&other.bytes) } + + #[inline] + pub fn starts_with_str(&self, prefix: &str) -> bool { + self.bytes.starts_with(prefix.as_bytes()) + } + + pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Wtf8> { + let (p, _) = self.to_str_split(); + let prefix_len = match prefix.into_searcher(p).next() { + SearchStep::Match(0, prefix_len) => prefix_len, + _ => return None, + }; + + // SAFETY: `p` is guaranteed to be a prefix of `self.bytes`, + // and `Searcher` is known to return valid indices. + unsafe { + let suffix = self.bytes.get_unchecked(prefix_len..); + Some(Wtf8::from_bytes_unchecked(suffix)) + } + } + + #[inline] + pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Wtf8> { + if !self.starts_with_str(prefix) { + return None; + } + + // SAFETY: `prefix` is a prefix of `self.bytes`. + unsafe { + let suffix = self.bytes.get_unchecked(prefix.len()..); + Some(Wtf8::from_bytes_unchecked(suffix)) + } + } + + pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Wtf8)> { + let (p, _) = self.to_str_split(); + let (start, end) = delimiter.into_searcher(p).next_match()?; + + // SAFETY: `p` is guaranteed to be a prefix of `self.inner`, + // and `Searcher` is known to return valid indices. + unsafe { + let before = p.get_unchecked(..start); + let after = self.bytes.get_unchecked(end..); + Some((before, Wtf8::from_bytes_unchecked(after))) + } + } } /// Returns a slice of the given string for the byte range \[`begin`..`end`). diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 8e71bdfcb5d15..8b58834eda7be 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -698,3 +698,64 @@ fn wtf8_to_owned() { assert_eq!(string.bytes, b"\xED\xA0\x80"); assert!(!string.is_known_utf8); } + +#[test] +fn wtf8_starts_with_str() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.starts_with_str("héllô")); + assert!(!slice.starts_with_str("héllô=wørld")); +} + +#[test] +fn wtf8_strip_prefix() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.strip_prefix("héllô=wørld").is_none()); + + { + let suffix = slice.strip_prefix('h'); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"\xC3\xA9ll\xC3\xB4=\xED\xA0\x80w\xC3\xB8rld",); + } + + { + let suffix = slice.strip_prefix("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld"); + } +} + +#[test] +fn wtf8_strip_prefix_str() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + assert!(slice.strip_prefix_str("héllô=wørld").is_none()); + + let suffix = slice.strip_prefix_str("héllô"); + assert!(suffix.is_some()); + assert_eq!(&suffix.unwrap().bytes, b"=\xED\xA0\x80w\xC3\xB8rld"); +} + +#[test] +fn wtf8_split_once() { + let mut string = Wtf8Buf::from_str("héllô="); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_str("wørld"); + let slice = string.as_slice(); + + let split = slice.split_once('='); + assert!(split.is_some()); + let (prefix, suffix) = split.unwrap(); + assert_eq!(prefix, "héllô"); + assert_eq!(&suffix.bytes, b"\xED\xA0\x80w\xC3\xB8rld"); +}