Skip to content

Commit

Permalink
Add ParallelString::par_split_ascii_whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
cuviper committed Jan 26, 2024
1 parent f5eb95f commit 108f4e8
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 0 deletions.
82 changes: 82 additions & 0 deletions src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ pub trait ParallelString {
///
/// As with `str::split_whitespace`, 'whitespace' is defined according to
/// the terms of the Unicode Derived Core Property `White_Space`.
/// If you only want to split on ASCII whitespace instead, use
/// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
///
/// # Examples
///
Expand All @@ -213,10 +215,65 @@ pub trait ParallelString {
/// .max_by_key(|word| word.len());
/// assert_eq!(Some("longest"), longest);
/// ```
///
/// All kinds of whitespace are considered:
///
/// ```
/// use rayon::prelude::*;
/// let words: Vec<&str> = " Mary had\ta\u{2009}little \n\t lamb"
/// .par_split_whitespace()
/// .collect();
/// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
/// ```
///
/// If the string is empty or all whitespace, the iterator yields no string slices:
///
/// ```
/// use rayon::prelude::*;
/// assert_eq!("".par_split_whitespace().count(), 0);
/// assert_eq!(" ".par_split_whitespace().count(), 0);
/// ```
fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
SplitWhitespace(self.as_parallel_string())
}

/// Returns a parallel iterator over the sub-slices of a string that are
/// separated by any amount of ASCII whitespace.
///
/// To split by Unicode `White_Space` instead, use
/// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
///
/// # Examples
///
/// ```
/// use rayon::prelude::*;
/// let longest = "which is the longest word?"
/// .par_split_ascii_whitespace()
/// .max_by_key(|word| word.len());
/// assert_eq!(Some("longest"), longest);
/// ```
///
/// All kinds of ASCII whitespace are considered, but not Unicode `White_Space`:
///
/// ```
/// use rayon::prelude::*;
/// let words: Vec<&str> = " Mary had\ta\u{2009}little \n\t lamb"
/// .par_split_ascii_whitespace()
/// .collect();
/// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
/// ```
///
/// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
///
/// ```
/// use rayon::prelude::*;
/// assert_eq!("".par_split_whitespace().count(), 0);
/// assert_eq!(" ".par_split_whitespace().count(), 0);
/// ```
fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
SplitAsciiWhitespace(self.as_parallel_string())
}

/// Returns a parallel iterator over substrings that match a
/// given character or predicate, similar to `str::matches`.
///
Expand Down Expand Up @@ -733,6 +790,31 @@ impl<'ch> ParallelIterator for SplitWhitespace<'ch> {

// /////////////////////////////////////////////////////////////////////////

/// Parallel iterator over substrings separated by ASCII whitespace
#[derive(Debug, Clone)]
pub struct SplitAsciiWhitespace<'ch>(&'ch str);

#[inline]
fn is_ascii_whitespace(c: char) -> bool {
c.is_ascii_whitespace()
}

impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
type Item = &'ch str;

fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.0
.par_split(is_ascii_whitespace)
.filter(not_empty)
.drive_unindexed(consumer)
}
}

// /////////////////////////////////////////////////////////////////////////

/// Parallel iterator over substrings that match a pattern
#[derive(Debug, Clone)]
pub struct Matches<'ch, P: Pattern> {
Expand Down
1 change: 1 addition & 0 deletions tests/clones.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ fn clone_str() {
check(s.par_split('\n'));
check(s.par_split_terminator('\n'));
check(s.par_split_whitespace());
check(s.par_split_ascii_whitespace());
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions tests/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ fn debug_str() {
check(s.par_split('\n'));
check(s.par_split_terminator('\n'));
check(s.par_split_whitespace());
check(s.par_split_ascii_whitespace());
}

#[test]
Expand Down
6 changes: 6 additions & 0 deletions tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ pub fn execute_strings_split() {
assert_eq!(serial, parallel);
}

for &(string, _) in &tests {
let serial: Vec<_> = string.split_ascii_whitespace().collect();
let parallel: Vec<_> = string.par_split_ascii_whitespace().collect();
assert_eq!(serial, parallel);
}

// try matching separators too!
for &(string, separator) in &tests {
let serial: Vec<_> = string.matches(separator).collect();
Expand Down

0 comments on commit 108f4e8

Please sign in to comment.