@@ -21,7 +21,7 @@ use char;
21
21
use fmt;
22
22
use iter:: { Map , Cloned , FusedIterator , TrustedLen , Filter } ;
23
23
use iter_private:: TrustedRandomAccess ;
24
- use slice:: { self , SliceIndex } ;
24
+ use slice:: { self , SliceIndex , Split as SliceSplit } ;
25
25
use mem;
26
26
27
27
pub mod pattern;
@@ -2722,7 +2722,10 @@ impl str {
2722
2722
/// the original string slice, separated by any amount of whitespace.
2723
2723
///
2724
2724
/// 'Whitespace' is defined according to the terms of the Unicode Derived
2725
- /// Core Property `White_Space`.
2725
+ /// Core Property `White_Space`. If you only want to split on ASCII whitespace
2726
+ /// instead, use [`split_ascii_whitespace`].
2727
+ ///
2728
+ /// [`split_ascii_whitespace`]: #method.split_ascii_whitespace
2726
2729
///
2727
2730
/// # Examples
2728
2731
///
@@ -2756,6 +2759,53 @@ impl str {
2756
2759
SplitWhitespace { inner : self . split ( IsWhitespace ) . filter ( IsNotEmpty ) }
2757
2760
}
2758
2761
2762
+ /// Split a string slice by ASCII whitespace.
2763
+ ///
2764
+ /// The iterator returned will return string slices that are sub-slices of
2765
+ /// the original string slice, separated by any amount of ASCII whitespace.
2766
+ ///
2767
+ /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
2768
+ ///
2769
+ /// [`split_whitespace`]: #method.split_whitespace
2770
+ ///
2771
+ /// # Examples
2772
+ ///
2773
+ /// Basic usage:
2774
+ ///
2775
+ /// ```
2776
+ /// #![feature(split_ascii_whitespace)]
2777
+ /// let mut iter = "A few words".split_ascii_whitespace();
2778
+ ///
2779
+ /// assert_eq!(Some("A"), iter.next());
2780
+ /// assert_eq!(Some("few"), iter.next());
2781
+ /// assert_eq!(Some("words"), iter.next());
2782
+ ///
2783
+ /// assert_eq!(None, iter.next());
2784
+ /// ```
2785
+ ///
2786
+ /// All kinds of ASCII whitespace are considered:
2787
+ ///
2788
+ /// ```
2789
+ /// let mut iter = " Mary had\ta little \n\t lamb".split_whitespace();
2790
+ /// assert_eq!(Some("Mary"), iter.next());
2791
+ /// assert_eq!(Some("had"), iter.next());
2792
+ /// assert_eq!(Some("a"), iter.next());
2793
+ /// assert_eq!(Some("little"), iter.next());
2794
+ /// assert_eq!(Some("lamb"), iter.next());
2795
+ ///
2796
+ /// assert_eq!(None, iter.next());
2797
+ /// ```
2798
+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
2799
+ #[ inline]
2800
+ pub fn split_ascii_whitespace ( & self ) -> SplitAsciiWhitespace {
2801
+ let inner = self
2802
+ . as_bytes ( )
2803
+ . split ( IsAsciiWhitespace )
2804
+ . filter ( IsNotEmpty )
2805
+ . map ( UnsafeBytesToStr ) ;
2806
+ SplitAsciiWhitespace { inner }
2807
+ }
2808
+
2759
2809
/// An iterator over the lines of a string, as string slices.
2760
2810
///
2761
2811
/// Lines are ended with either a newline (`\n`) or a carriage return with
@@ -3895,6 +3945,20 @@ pub struct SplitWhitespace<'a> {
3895
3945
inner : Filter < Split < ' a , IsWhitespace > , IsNotEmpty > ,
3896
3946
}
3897
3947
3948
+ /// An iterator over the non-ASCII-whitespace substrings of a string,
3949
+ /// separated by any amount of ASCII whitespace.
3950
+ ///
3951
+ /// This struct is created by the [`split_ascii_whitespace`] method on [`str`].
3952
+ /// See its documentation for more.
3953
+ ///
3954
+ /// [`split_ascii_whitespace`]: ../../std/primitive.str.html#method.split_ascii_whitespace
3955
+ /// [`str`]: ../../std/primitive.str.html
3956
+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
3957
+ #[ derive( Clone , Debug ) ]
3958
+ pub struct SplitAsciiWhitespace < ' a > {
3959
+ inner : Map < Filter < SliceSplit < ' a , u8 , IsAsciiWhitespace > , IsNotEmpty > , UnsafeBytesToStr > ,
3960
+ }
3961
+
3898
3962
#[ derive( Clone ) ]
3899
3963
struct IsWhitespace ;
3900
3964
@@ -3914,37 +3978,98 @@ impl FnMut<(char, )> for IsWhitespace {
3914
3978
}
3915
3979
}
3916
3980
3981
+ #[ derive( Clone ) ]
3982
+ struct IsAsciiWhitespace ;
3983
+
3984
+ impl < ' a > FnOnce < ( & ' a u8 , ) > for IsAsciiWhitespace {
3985
+ type Output = bool ;
3986
+
3987
+ #[ inline]
3988
+ extern "rust-call" fn call_once ( mut self , arg : ( & u8 , ) ) -> bool {
3989
+ self . call_mut ( arg)
3990
+ }
3991
+ }
3992
+
3993
+ impl < ' a > FnMut < ( & ' a u8 , ) > for IsAsciiWhitespace {
3994
+ #[ inline]
3995
+ extern "rust-call" fn call_mut ( & mut self , arg : ( & u8 , ) ) -> bool {
3996
+ arg. 0 . is_ascii_whitespace ( )
3997
+ }
3998
+ }
3999
+
3917
4000
#[ derive( Clone ) ]
3918
4001
struct IsNotEmpty ;
3919
4002
3920
4003
impl < ' a , ' b > FnOnce < ( & ' a & ' b str , ) > for IsNotEmpty {
3921
4004
type Output = bool ;
3922
4005
3923
4006
#[ inline]
3924
- extern "rust-call" fn call_once ( mut self , arg : ( & & str , ) ) -> bool {
4007
+ extern "rust-call" fn call_once ( mut self , arg : ( & ' a & ' b str , ) ) -> bool {
3925
4008
self . call_mut ( arg)
3926
4009
}
3927
4010
}
3928
4011
3929
4012
impl < ' a , ' b > FnMut < ( & ' a & ' b str , ) > for IsNotEmpty {
3930
4013
#[ inline]
3931
- extern "rust-call" fn call_mut ( & mut self , arg : ( & & str , ) ) -> bool {
4014
+ extern "rust-call" fn call_mut ( & mut self , arg : ( & ' a & ' b str , ) ) -> bool {
4015
+ !arg. 0 . is_empty ( )
4016
+ }
4017
+ }
4018
+
4019
+ impl < ' a , ' b > FnOnce < ( & ' a & ' b [ u8 ] , ) > for IsNotEmpty {
4020
+ type Output = bool ;
4021
+
4022
+ #[ inline]
4023
+ extern "rust-call" fn call_once ( mut self , arg : ( & ' a & ' b [ u8 ] , ) ) -> bool {
4024
+ self . call_mut ( arg)
4025
+ }
4026
+ }
4027
+
4028
+ impl < ' a , ' b > FnMut < ( & ' a & ' b [ u8 ] , ) > for IsNotEmpty {
4029
+ #[ inline]
4030
+ extern "rust-call" fn call_mut ( & mut self , arg : ( & ' a & ' b [ u8 ] , ) ) -> bool {
3932
4031
!arg. 0 . is_empty ( )
3933
4032
}
3934
4033
}
3935
4034
4035
+ #[ derive( Clone ) ]
4036
+ struct UnsafeBytesToStr ;
4037
+
4038
+ impl < ' a > FnOnce < ( & ' a [ u8 ] , ) > for UnsafeBytesToStr {
4039
+ type Output = & ' a str ;
4040
+
4041
+ #[ inline]
4042
+ extern "rust-call" fn call_once ( mut self , arg : ( & ' a [ u8 ] , ) ) -> & ' a str {
4043
+ self . call_mut ( arg)
4044
+ }
4045
+ }
4046
+
4047
+ impl < ' a > FnMut < ( & ' a [ u8 ] , ) > for UnsafeBytesToStr {
4048
+ #[ inline]
4049
+ extern "rust-call" fn call_mut ( & mut self , arg : ( & ' a [ u8 ] , ) ) -> & ' a str {
4050
+ unsafe { from_utf8_unchecked ( arg. 0 ) }
4051
+ }
4052
+ }
4053
+
3936
4054
3937
4055
#[ stable( feature = "split_whitespace" , since = "1.1.0" ) ]
3938
4056
impl < ' a > Iterator for SplitWhitespace < ' a > {
3939
4057
type Item = & ' a str ;
3940
4058
4059
+ #[ inline]
3941
4060
fn next ( & mut self ) -> Option < & ' a str > {
3942
4061
self . inner . next ( )
3943
4062
}
4063
+
4064
+ #[ inline]
4065
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
4066
+ self . inner . size_hint ( )
4067
+ }
3944
4068
}
3945
4069
3946
4070
#[ stable( feature = "split_whitespace" , since = "1.1.0" ) ]
3947
4071
impl < ' a > DoubleEndedIterator for SplitWhitespace < ' a > {
4072
+ #[ inline]
3948
4073
fn next_back ( & mut self ) -> Option < & ' a str > {
3949
4074
self . inner . next_back ( )
3950
4075
}
@@ -3953,6 +4078,32 @@ impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
3953
4078
#[ stable( feature = "fused" , since = "1.26.0" ) ]
3954
4079
impl < ' a > FusedIterator for SplitWhitespace < ' a > { }
3955
4080
4081
+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
4082
+ impl < ' a > Iterator for SplitAsciiWhitespace < ' a > {
4083
+ type Item = & ' a str ;
4084
+
4085
+ #[ inline]
4086
+ fn next ( & mut self ) -> Option < & ' a str > {
4087
+ self . inner . next ( )
4088
+ }
4089
+
4090
+ #[ inline]
4091
+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
4092
+ self . inner . size_hint ( )
4093
+ }
4094
+ }
4095
+
4096
+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
4097
+ impl < ' a > DoubleEndedIterator for SplitAsciiWhitespace < ' a > {
4098
+ #[ inline]
4099
+ fn next_back ( & mut self ) -> Option < & ' a str > {
4100
+ self . inner . next_back ( )
4101
+ }
4102
+ }
4103
+
4104
+ #[ unstable( feature = "split_ascii_whitespace" , issue = "48656" ) ]
4105
+ impl < ' a > FusedIterator for SplitAsciiWhitespace < ' a > { }
4106
+
3956
4107
/// An iterator of [`u16`] over the string encoded as UTF-16.
3957
4108
///
3958
4109
/// [`u16`]: ../../std/primitive.u16.html
0 commit comments