diff --git a/library/alloc/tests/lib.rs b/library/alloc/tests/lib.rs index 8c57c804ad2dc..5d518d515c915 100644 --- a/library/alloc/tests/lib.rs +++ b/library/alloc/tests/lib.rs @@ -25,6 +25,7 @@ #![feature(const_btree_new)] #![feature(const_default_impls)] #![feature(const_trait_impl)] +#![feature(split_rinclusive)] use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index d3a87c056cfb1..60218e3a50543 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1325,6 +1325,21 @@ fn test_split_char_iterator_inclusive() { assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]); } + +#[test] +fn test_split_char_iterator_rinclusive() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_rinclusive('\n').collect(); + assert_eq!(split, ["\nMäry häd ä little lämb", "\nLittle lämb", "\n"]); + + let uppercase_separated = "SheepSharkTurtleCat"; + let split: Vec<&str> = uppercase_separated + .split_rinclusive(char::is_uppercase) + .collect(); + assert_eq!(split, ["Sheep", "Shark", "Turtle", "Cat"]); +} + #[test] fn test_split_char_iterator_inclusive_rev() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; @@ -1349,6 +1364,21 @@ fn test_split_char_iterator_inclusive_rev() { assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]); } +#[test] +fn test_split_char_iterator_rinclusive_rev() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_rinclusive('\n').rev().collect(); + assert_eq!(split, ["\n", "\nLittle lämb", "\nMäry häd ä little lämb"]); + + let uppercase_separated = "SheepSharkTurtleCat"; + let split: Vec<&str> = uppercase_separated + .split_rinclusive(char::is_uppercase) + .rev() + .collect(); + assert_eq!(split, ["Cat", "Turtle", "Shark", "Sheep"]); +} + #[test] fn test_rsplit() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; diff --git a/library/core/src/str/iter.rs b/library/core/src/str/iter.rs index 94cb81e9d41a1..04b9a5e4b0e53 100644 --- a/library/core/src/str/iter.rs +++ b/library/core/src/str/iter.rs @@ -564,6 +564,7 @@ pub(super) struct SplitInternal<'a, P: Pattern<'a>> { pub(super) end: usize, pub(super) matcher: P::Searcher, pub(super) allow_trailing_empty: bool, + pub(super) allow_leading_empty: bool, pub(super) finished: bool, } @@ -577,6 +578,7 @@ where .field("end", &self.end) .field("matcher", &self.matcher) .field("allow_trailing_empty", &self.allow_trailing_empty) + .field("allow_leading_empty", &self.allow_leading_empty) .field("finished", &self.finished) .finish() } @@ -603,6 +605,18 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { return None; } + if !self.allow_leading_empty { + self.allow_leading_empty = true; + match self.next() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + let haystack = self.matcher.haystack(); match self.matcher.next_match() { // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries. @@ -635,6 +649,38 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { } } + #[inline] + fn next_rinclusive(&mut self) -> Option<&'a str> { + if self.finished { + return None; + } + + if !self.allow_leading_empty { + self.allow_leading_empty = true; + match self.next_rinclusive() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.finished { + return None; + } + } + } + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match() { + // SAFETY: `Searcher` guarantees that `a` lies on unicode boundaries, + // and self.start is either the start of the original string, + // or `a` was assigned to it, so it also lies on unicode boundary. + Some((a, _)) => unsafe { + let elt = haystack.get_unchecked(self.start..a); + self.start = a; + Some(elt) + }, + None => self.get_end(), + } + } + #[inline] fn next_back(&mut self) -> Option<&'a str> where @@ -715,6 +761,37 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { } } + #[inline] + fn next_back_rinclusive(&mut self) -> Option<&'a str> + where + P::Searcher: ReverseSearcher<'a>, + { + if self.finished { + return None; + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match_back() { + // SAFETY: `Searcher` guarantees that `a` lies on unicode boundary, + // and self.end is either the end of the original string, + // or `a` was assigned to it, so it also lies on unicode boundary. + Some((a, _)) => unsafe { + let elt = haystack.get_unchecked(a..self.end); + self.end = a; + Some(elt) + }, + // SAFETY: self.start is either the start of the original string, + // or start of a substring that represents the part of the string that hasn't + // iterated yet. Either way, it is guaranteed to lie on unicode boundary. + // self.end is either the end of the original string, + // or `a` was assigned to it, so it also lies on unicode boundary. + None => unsafe { + self.finished = true; + Some(haystack.get_unchecked(self.start..self.end)) + }, + } + } + #[inline] fn as_str(&self) -> &'a str { // `Self::get_end` doesn't change `self.start` @@ -1376,6 +1453,78 @@ impl<'a, P: Pattern<'a>> SplitInclusive<'a, P> { } } +/// An iterator over the substrings of a string, +/// new substrings beginning when matching to a predicate function. +/// Unlike `Split`, it contains the matched part as the start +/// of each subslice - besides the first, which is the contents +/// up until the first match. +/// +/// This struct is created by the [`split_rinclusive`] method on [`str`]. +/// See its documentation for more. +/// +/// [`split_rinclusive`]: str::split_rinclusive +#[unstable(feature = "split_rinclusive", issue = "none")] +pub struct SplitRInclusive<'a, P: Pattern<'a>>(pub(super) SplitInternal<'a, P>); + +#[unstable(feature = "split_rinclusive", issue = "none")] +impl<'a, P: Pattern<'a>> Iterator for SplitRInclusive<'a, P> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.0.next_rinclusive() + } +} + +#[unstable(feature = "split_rinclusive", issue = "none")] +impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitRInclusive<'a, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitRInclusive").field("0", &self.0).finish() + } +} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_rinclusive", issue = "none")] +impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitRInclusive<'a, P> { + fn clone(&self) -> Self { + SplitRInclusive(self.0.clone()) + } +} + +#[unstable(feature = "split_rinclusive", issue = "none")] +impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator + for SplitRInclusive<'a, P> +{ + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back_rinclusive() + } +} + +#[unstable(feature = "split_rinclusive", issue = "none")] +impl<'a, P: Pattern<'a>> FusedIterator for SplitRInclusive<'a, P> {} + +impl<'a, P: Pattern<'a>> SplitRInclusive<'a, P> { + /// Returns remainder of the splitted string + /// + /// # Examples + /// + /// ``` + /// #![feature(str_split_rinclusive_as_str)] + /// let mut split = "Mary had a little lamb".split_rinclusive(' '); + /// assert_eq!(split.as_str(), "Mary had a little lamb"); + /// split.next(); + /// assert_eq!(split.as_str(), " had a little lamb"); + /// split.by_ref().for_each(drop); + /// assert_eq!(split.as_str(), ""); + /// ``` + #[inline] + #[unstable(feature = "split_rinclusive", issue = "none")] + pub fn as_str(&self) -> &'a str { + self.0.as_str() + } +} + /// An iterator of [`u16`] over the string encoded as UTF-16. /// /// This struct is created by the [`encode_utf16`] method on [`str`]. diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index 607a0179ff4b9..b634beecc9e49 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -68,6 +68,9 @@ pub use iter::SplitAsciiWhitespace; #[stable(feature = "split_inclusive", since = "1.51.0")] pub use iter::SplitInclusive; +#[unstable(feature = "split_rinclusive", issue = "none")] +pub use iter::SplitRInclusive; + #[unstable(feature = "str_internals", issue = "none")] pub use validations::{next_code_point, utf8_char_width}; @@ -1230,6 +1233,7 @@ impl str { end: self.len(), matcher: pat.into_searcher(self), allow_trailing_empty: true, + allow_leading_empty: true, finished: false, }) } @@ -1270,6 +1274,58 @@ impl str { end: self.len(), matcher: pat.into_searcher(self), allow_trailing_empty: false, + allow_leading_empty: true, + finished: false, + }) + } + + /// An iterator over substrings of this string slice, separated by + /// characters matched by a pattern. Differs from the iterator produced by + /// `split` in that `split_rinclusive` leaves the matched part as the + /// beginning of the next substring, except possibly the first which is whatever before the first match. + /// + /// Put another way, a match is the start of a new substring. + /// + /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a + /// function or closure that determines if a character matches. + /// + /// [`char`]: prim@char + /// [pattern]: self::pattern + /// + /// # Examples + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb." + /// .split_rinclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb."]); + /// ``` + /// + /// If the first element of the string is matched, + /// the leading empty string is omitted. + /// + /// ``` + /// let v: Vec<&str> = "MaryHadALittleLamb" + /// .split_rinclusive(char::is_uppercase).collect(); + /// assert_eq!(v, ["Mary", "Had", "A", "Little", "Lamb]); + /// ``` + /// + /// If the last element of the string is matched, + /// that element will be considered the final substring returned by the iterator. + /// + /// ``` + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n" + /// .split_rinclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb", "\nlittle lamb", "\nlittle lamb.", "\n"]); + /// ``` + #[unstable(feature = "split_rinclusive", issue = "none")] + #[inline] + pub fn split_rinclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitRInclusive<'a, P> { + SplitRInclusive(SplitInternal { + start: 0, + end: self.len(), + matcher: pat.into_searcher(self), + allow_trailing_empty: false, + allow_leading_empty: false, finished: false, }) }