From e4808afe8eb97567b6ba457337fbdd321336251e Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Fri, 1 Dec 2023 18:58:20 -0500 Subject: [PATCH] Add ASCII whitespace trimming functions to `&str` Add `trim_ascii_start`, `trim_ascii_end`, and `trim_ascii` functions to `&str` for trimming ASCII whitespace under the `byte_slice_trim_ascii` feature gate. Add `inline` to `[u8]` `trim_ascii` functions --- library/core/src/slice/ascii.rs | 3 ++ library/core/src/str/mod.rs | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index 4cfccd2e3ce0f..ce04a9f40898e 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -125,6 +125,7 @@ impl [u8] { /// assert_eq!(b"".trim_ascii_start(), b""); /// ``` #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")] + #[inline] pub const fn trim_ascii_start(&self) -> &[u8] { let mut bytes = self; // Note: A pattern matching based approach (instead of indexing) allows @@ -154,6 +155,7 @@ impl [u8] { /// assert_eq!(b"".trim_ascii_end(), b""); /// ``` #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")] + #[inline] pub const fn trim_ascii_end(&self) -> &[u8] { let mut bytes = self; // Note: A pattern matching based approach (instead of indexing) allows @@ -184,6 +186,7 @@ impl [u8] { /// assert_eq!(b"".trim_ascii(), b""); /// ``` #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")] + #[inline] pub const fn trim_ascii(&self) -> &[u8] { self.trim_ascii_start().trim_ascii_end() } diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 27178328be5c1..a22c46edce254 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -2423,6 +2423,85 @@ impl str { me.make_ascii_lowercase() } + /// Returns a string slice with leading ASCII whitespace removed. + /// + /// 'Whitespace' refers to the definition used by + /// [`u8::is_ascii_whitespace`]. + /// + /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace + /// + /// # Examples + /// + /// ``` + /// #![feature(byte_slice_trim_ascii)] + /// + /// assert_eq!(" \t \u{3000}hello world\n".trim_ascii_start(), "\u{3000}hello world\n"); + /// assert_eq!(" ".trim_ascii_start(), ""); + /// assert_eq!("".trim_ascii_start(), ""); + /// ``` + #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")] + #[must_use = "this returns the trimmed string as a new slice, \ + without modifying the original"] + #[inline] + pub const fn trim_ascii_start(&self) -> &str { + // SAFETY: Removing ASCII characters from a `&str` does not invalidate + // UTF-8. + unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_start()) } + } + + /// Returns a string slice with trailing ASCII whitespace removed. + /// + /// 'Whitespace' refers to the definition used by + /// [`u8::is_ascii_whitespace`]. + /// + /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace + /// + /// # Examples + /// + /// ``` + /// #![feature(byte_slice_trim_ascii)] + /// + /// assert_eq!("\r hello world\u{3000}\n ".trim_ascii_end(), "\r hello world\u{3000}"); + /// assert_eq!(" ".trim_ascii_end(), ""); + /// assert_eq!("".trim_ascii_end(), ""); + /// ``` + #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")] + #[must_use = "this returns the trimmed string as a new slice, \ + without modifying the original"] + #[inline] + pub const fn trim_ascii_end(&self) -> &str { + // SAFETY: Removing ASCII characters from a `&str` does not invalidate + // UTF-8. + unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii_end()) } + } + + /// Returns a string slice with leading and trailing ASCII whitespace + /// removed. + /// + /// 'Whitespace' refers to the definition used by + /// [`u8::is_ascii_whitespace`]. + /// + /// [`u8::is_ascii_whitespace`]: u8::is_ascii_whitespace + /// + /// # Examples + /// + /// ``` + /// #![feature(byte_slice_trim_ascii)] + /// + /// assert_eq!("\r hello world\n ".trim_ascii(), "hello world"); + /// assert_eq!(" ".trim_ascii(), ""); + /// assert_eq!("".trim_ascii(), ""); + /// ``` + #[unstable(feature = "byte_slice_trim_ascii", issue = "94035")] + #[must_use = "this returns the trimmed string as a new slice, \ + without modifying the original"] + #[inline] + pub const fn trim_ascii(&self) -> &str { + // SAFETY: Removing ASCII characters from a `&str` does not invalidate + // UTF-8. + unsafe { core::str::from_utf8_unchecked(self.as_bytes().trim_ascii()) } + } + /// Return an iterator that escapes each char in `self` with [`char::escape_debug`]. /// /// Note: only extended grapheme codepoints that begin the string will be