Skip to content

Commit

Permalink
Add OsStr methods for stripping and splitting str prefixes.
Browse files Browse the repository at this point in the history
* `OsStr::starts_with()` tests whether an `OsStr` has a prefix matching
  the given `Pattern`.

* `OsStr::strip_prefix()` returns the `OsStr` after removing a prefix
  matching the given `Pattern`.

* `OsStr::split_once()` splits an `OsStr` into a `(&str, &OsStr)` pair,
  where the delimiter matches a given `Pattern`.

* `OsStr::starts_with_str()` and `OsStr::strip_prefix_str()` are
  specialized variants that are implemented more efficiently than the
  `Pattern` cases.

In all cases, the prefix must be Unicode because the current `Pattern`
trait is built around the `&str` type.
  • Loading branch information
jmillikin committed May 1, 2023
1 parent 0f93dae commit 7077331
Show file tree
Hide file tree
Showing 7 changed files with 391 additions and 1 deletion.
143 changes: 143 additions & 0 deletions library/std/src/ffi/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::fmt;
use crate::hash::{Hash, Hasher};
use crate::ops;
use crate::rc::Rc;
use crate::str::pattern::Pattern;
use crate::str::FromStr;
use crate::sync::Arc;

Expand Down Expand Up @@ -1034,6 +1035,148 @@ impl OsStr {
pub fn eq_ignore_ascii_case<S: AsRef<OsStr>>(&self, other: S) -> bool {
self.inner.eq_ignore_ascii_case(&other.as_ref().inner)
}

/// Returns `true` if the given pattern matches a prefix of this `OsStr`.
///
/// Returns `false` if it does not.
///
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
/// function or closure that determines if a character matches.
///
/// [`char`]: prim@char
/// [pattern]: crate::str::pattern
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::OsString;
///
/// let bananas = OsString::from("bananas");
///
/// assert!(bananas.starts_with("bana"));
/// assert!(!bananas.starts_with("nana"));
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pattern: P) -> bool {
let (p, _) = self.inner.to_str_split();
p.starts_with(pattern)
}

/// Returns `true` if the given `str` matches a prefix of this `OsStr`.
///
/// Same as [`OsStr::starts_with`], but is easier to optimize to a
/// direct bitwise comparison.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::OsString;
///
/// let bananas = OsString::from("bananas");
///
/// assert!(bananas.starts_with_str("bana"));
/// assert!(!bananas.starts_with_str("nana"));
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn starts_with_str(&self, prefix: &str) -> bool {
self.inner.starts_with_str(prefix)
}

/// Returns this `OsStr` with the given prefix removed.
///
/// If the `OsStr` starts with the pattern `prefix`, returns the substring
/// after the prefix, wrapped in `Some`.
///
/// If the `OsStr` does not start with `prefix`, returns `None`.
///
/// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
/// function or closure that determines if a character matches.
///
/// [`char`]: prim@char
/// [pattern]: crate::str::pattern
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let foobar = OsString::from("foo:bar");
///
/// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar")));
/// assert_eq!(foobar.strip_prefix("bar"), None);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a OsStr> {
Some(OsStr::from_inner(self.inner.strip_prefix(prefix)?))
}

/// Returns this `OsStr` with the given prefix removed.
///
/// Same as [`OsStr::strip_prefix`], but is easier to optimize to a
/// direct bitwise comparison.
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let foobar = OsString::from("foo:bar");
///
/// assert_eq!(foobar.strip_prefix("foo:"), Some(OsStr::new("bar")));
/// assert_eq!(foobar.strip_prefix_str("bar"), None);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn strip_prefix_str(&self, prefix: &str) -> Option<&OsStr> {
Some(OsStr::from_inner(self.inner.strip_prefix_str(prefix)?))
}

/// Splits this `OsStr` on the first occurrence of the specified delimiter,
/// returning the prefix before delimiter and suffix after delimiter.
///
/// The prefix is returned as a `str`, because a successful `Pattern` match
/// implies its matching prefix was valid Unicode.
///
/// # Examples
///
/// ```
/// #![feature(osstr_str_prefix_ops)]
///
/// use std::ffi::{OsStr, OsString};
///
/// let foo = OsString::from("foo:");
/// let foobar = OsString::from("foo:bar");
///
/// assert_eq!(foo.split_once(':'), Some(("foo", OsStr::new(""))));
/// assert_eq!(foobar.split_once(':'), Some(("foo", OsStr::new("bar"))));
/// assert_eq!(foobar.split_once('='), None);
/// ```
#[unstable(feature = "osstr_str_prefix_ops", issue = "none")]
#[must_use]
#[inline]
pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a OsStr)> {
let (before, after) = self.inner.split_once(delimiter)?;
Some((before, OsStr::from_inner(after)))
}
}

#[stable(feature = "box_from_os_str", since = "1.17.0")]
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@
#![feature(needs_panic_runtime)]
#![feature(negative_impls)]
#![feature(never_type)]
#![feature(pattern)]
#![feature(platform_intrinsics)]
#![feature(prelude_import)]
#![feature(rustc_attrs)]
Expand Down
48 changes: 48 additions & 0 deletions library/std/src/sys/unix/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::fmt::Write;
use crate::mem;
use crate::rc::Rc;
use crate::str;
use crate::str::pattern::{Pattern, SearchStep, Searcher};
use crate::sync::Arc;
use crate::sys_common::{AsInner, IntoInner};

Expand Down Expand Up @@ -305,4 +306,51 @@ impl Slice {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}

#[inline]
pub fn starts_with_str(&self, prefix: &str) -> bool {
self.inner.starts_with(prefix.as_bytes())
}

pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> {
let (p, _) = self.to_str_split();
let prefix_len = match prefix.into_searcher(p).next() {
SearchStep::Match(0, prefix_len) => prefix_len,
_ => return None,
};

// SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
// and `Searcher` is known to return valid indices.
unsafe {
let suffix = self.inner.get_unchecked(prefix_len..);
Some(Slice::from_u8_slice(suffix))
}
}

#[inline]
pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> {
if !self.starts_with_str(prefix) {
return None;
}

// SAFETY: `prefix` is a prefix of `self.inner`.
unsafe {
let suffix = self.inner.get_unchecked(prefix.len()..);
Some(Slice::from_u8_slice(suffix))
}
}

pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> {
let (p, _) = self.to_str_split();
let (start, end) = delimiter.into_searcher(p).next_match()?;

// SAFETY: `p` is guaranteed to be a prefix of `self.inner`,
// and `Searcher` is known to return valid indices.
unsafe {
let before = p.get_unchecked(..start);
let after = self.inner.get_unchecked(end..);

Some((before, Slice::from_u8_slice(after)))
}
}
}
61 changes: 61 additions & 0 deletions library/std/src/sys/unix/os_str/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,64 @@ fn slice_to_str_split() {
assert_eq!(&suffix.inner, b"\xFF");
}
}

#[test]
fn slice_starts_with_str() {
let mut string = Buf::from_string(String::from("héllô="));
string.push_slice(Slice::from_u8_slice(b"\xFF"));
string.push_slice(Slice::from_str("wørld"));
let slice = string.as_slice();

assert!(slice.starts_with_str("héllô"));
assert!(!slice.starts_with_str("héllô=wørld"));
}

#[test]
fn slice_strip_prefix() {
let mut string = Buf::from_string(String::from("héllô="));
string.push_slice(Slice::from_u8_slice(b"\xFF"));
string.push_slice(Slice::from_str("wørld"));
let slice = string.as_slice();

assert!(slice.strip_prefix("héllô=wørld").is_none());

{
let suffix = slice.strip_prefix('h');
assert!(suffix.is_some());
assert_eq!(&suffix.unwrap().inner, b"\xC3\xA9ll\xC3\xB4=\xFFw\xC3\xB8rld",);
}

{
let suffix = slice.strip_prefix("héllô");
assert!(suffix.is_some());
assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld");
}
}

#[test]
fn slice_strip_prefix_str() {
let mut string = Buf::from_string(String::from("héllô="));
string.push_slice(Slice::from_u8_slice(b"\xFF"));
string.push_slice(Slice::from_str("wørld"));
let slice = string.as_slice();

assert!(slice.strip_prefix_str("héllô=wørld").is_none());

let suffix = slice.strip_prefix_str("héllô");
assert!(suffix.is_some());
assert_eq!(&suffix.unwrap().inner, b"=\xFFw\xC3\xB8rld");
}

#[test]
fn slice_split_once() {
let mut string = Buf::from_string(String::from("héllô="));
string.push_slice(Slice::from_u8_slice(b"\xFF"));
string.push_slice(Slice::from_str("wørld"));
let slice = string.as_slice();

let split = slice.split_once('=');
assert!(split.is_some());
let (prefix, suffix) = split.unwrap();
assert_eq!(prefix, "héllô");
assert_eq!(&suffix.inner, b"\xFFw\xC3\xB8rld");
}
31 changes: 30 additions & 1 deletion library/std/src/sys/windows/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::collections::TryReserveError;
use crate::fmt;
use crate::mem;
use crate::rc::Rc;
use crate::str::pattern::Pattern;
use crate::sync::Arc;
use crate::sys_common::wtf8::{Wtf8, Wtf8Buf};
use crate::sys_common::{AsInner, FromInner, IntoInner};
Expand Down Expand Up @@ -160,13 +161,20 @@ impl Slice {
unsafe { mem::transmute(Wtf8::from_str(s)) }
}

#[inline]
fn from_inner(inner: &Wtf8) -> &Slice {
// SAFETY: Slice is just a wrapper of Wtf8,
// therefore converting &Wtf8 to &Slice is safe.
unsafe { &*(inner as *const Wtf8 as *const Slice) }
}

pub fn to_str(&self) -> Option<&str> {
self.inner.as_str()
}

pub fn to_str_split(&self) -> (&str, &Slice) {
let (prefix, suffix) = self.inner.to_str_split();
(prefix, Slice { inner: suffix })
(prefix, Slice::from_inner(suffix))
}

pub fn to_string_lossy(&self) -> Cow<'_, str> {
Expand Down Expand Up @@ -231,4 +239,25 @@ impl Slice {
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.inner.eq_ignore_ascii_case(&other.inner)
}

#[inline]
pub fn starts_with_str(&self, prefix: &str) -> bool {
self.inner.starts_with_str(prefix)
}

#[inline]
pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a Slice> {
Some(Slice::from_inner(self.inner.strip_prefix(prefix)?))
}

#[inline]
pub fn strip_prefix_str(&self, prefix: &str) -> Option<&Slice> {
Some(Slice::from_inner(self.inner.strip_prefix_str(prefix)?))
}

#[inline]
pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a Slice)> {
let (before, after) = self.inner.split_once(delimiter)?;
Some((before, Slice::from_inner(after)))
}
}
Loading

0 comments on commit 7077331

Please sign in to comment.