Skip to content

Commit

Permalink
BREAKING: put 'ByteSlice::fields' behind 'unicode' feature
Browse files Browse the repository at this point in the history
It currently uses 'char::is_whitespace', but this is more of an
implementation detail. While 'char::is_whitespace' is available in
'core', it's plausible that we might use our own data some data. In
particular, 'trim' already uses its own data.

I believe this is the only routine that makes direct use of some kind of
Unicode data that wasn't previously gated behind the 'unicode' feature.

Ref #40
  • Loading branch information
BurntSushi committed Jul 5, 2022
1 parent 3256d07 commit 1d92c84
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
12 changes: 10 additions & 2 deletions src/ext_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1053,8 +1053,9 @@ pub trait ByteSlice: Sealed {
byteset::rfind_not(self.as_bytes(), byteset.as_ref())
}

/// Returns an iterator over the fields in a byte string, separated by
/// contiguous whitespace.
/// Returns an iterator over the fields in a byte string, separated
/// by contiguous whitespace (according to the Unicode property
/// `White_Space`).
///
/// # Example
///
Expand All @@ -1075,6 +1076,7 @@ pub trait ByteSlice: Sealed {
///
/// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count());
/// ```
#[cfg(feature = "unicode")]
#[inline]
fn fields(&self) -> Fields<'_> {
Fields::new(self.as_bytes())
Expand Down Expand Up @@ -3356,21 +3358,27 @@ impl<'a> iter::FusedIterator for Bytes<'a> {}

/// An iterator over the fields in a byte string, separated by whitespace.
///
/// Whitespace for this iterator is defined by the Unicode property
/// `White_Space`.
///
/// This iterator splits on contiguous runs of whitespace, such that the fields
/// in `foo\t\t\n \nbar` are `foo` and `bar`.
///
/// `'a` is the lifetime of the byte string being split.
#[cfg(feature = "unicode")]
#[derive(Debug)]
pub struct Fields<'a> {
it: FieldsWith<'a, fn(char) -> bool>,
}

#[cfg(feature = "unicode")]
impl<'a> Fields<'a> {
fn new(bytes: &'a [u8]) -> Fields<'a> {
Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
}
}

#[cfg(feature = "unicode")]
impl<'a> Iterator for Fields<'a> {
type Item = &'a [u8];

Expand Down
7 changes: 4 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,10 +379,11 @@ extern crate alloc;
pub use crate::bstr::BStr;
#[cfg(feature = "alloc")]
pub use crate::bstring::BString;
#[cfg(feature = "unicode")]
pub use crate::ext_slice::Fields;
pub use crate::ext_slice::{
ByteSlice, Bytes, Fields, FieldsWith, Find, FindReverse, Finder,
FinderReverse, Lines, LinesWithTerminator, Split, SplitN, SplitNReverse,
SplitReverse, B,
ByteSlice, Bytes, FieldsWith, Find, FindReverse, Finder, FinderReverse,
Lines, LinesWithTerminator, Split, SplitN, SplitNReverse, SplitReverse, B,
};
#[cfg(feature = "alloc")]
pub use crate::ext_vec::{concat, join, ByteVec, DrainBytes, FromUtf8Error};
Expand Down

0 comments on commit 1d92c84

Please sign in to comment.