@@ -476,7 +476,6 @@ pub mod groups {
476
476
use getopts:: { HasArg , Long , Maybe , Multi , No , Occur , Opt , Optional , Req } ;
477
477
use getopts :: { Short, Yes} ;
478
478
479
- use std:: str;
480
479
use std:: vec;
481
480
482
481
/** one group of options, e.g., both -h and --help, along with
@@ -667,7 +666,7 @@ pub mod groups {
667
666
668
667
// FIXME: #5516
669
668
let mut desc_rows = ~[ ] ;
670
- for str :: each_split_within( desc_normalized_whitespace, 54 ) |substr| {
669
+ for each_split_within( desc_normalized_whitespace, 54 ) |substr| {
671
670
desc_rows. push( substr. to_owned( ) ) ;
672
671
}
673
672
@@ -683,6 +682,103 @@ pub mod groups {
683
682
rows. collect :: < ~[ ~str ] > ( ) . connect ( "\n " ) +
684
683
"\n \n " ;
685
684
}
685
+
686
+ /** Splits a string into substrings with possibly internal whitespace,
687
+ * each of them at most `lim` bytes long. The substrings have leading and trailing
688
+ * whitespace removed, and are only cut at whitespace boundaries.
689
+ *
690
+ * Note: Function was moved here from `std::str` because this module is the only place that
691
+ * uses it, and because it was to specific for a general string function.
692
+ *
693
+ * #Failure:
694
+ *
695
+ * Fails during iteration if the string contains a non-whitespace
696
+ * sequence longer than the limit.
697
+ */
698
+ priv fn each_split_within<' a>( ss: & ' a str,
699
+ lim: uint,
700
+ it: & fn ( & ' a str) -> bool) -> bool {
701
+ // Just for fun, let's write this as an state machine:
702
+
703
+ enum SplitWithinState {
704
+ A , // leading whitespace, initial state
705
+ B , // words
706
+ C , // internal and trailing whitespace
707
+ }
708
+ enum Whitespace {
709
+ Ws , // current char is whitespace
710
+ Cr // current char is not whitespace
711
+ }
712
+ enum LengthLimit {
713
+ UnderLim , // current char makes current substring still fit in limit
714
+ OverLim // current char makes current substring no longer fit in limit
715
+ }
716
+
717
+ let mut slice_start = 0 ;
718
+ let mut last_start = 0 ;
719
+ let mut last_end = 0 ;
720
+ let mut state = A ;
721
+ let mut fake_i = ss. len ( ) ;
722
+ let mut lim = lim;
723
+
724
+ let mut cont = true ;
725
+ let slice: & fn ( ) = || { cont = it ( ss. slice ( slice_start, last_end) ) } ;
726
+
727
+ // if the limit is larger than the string, lower it to save cycles
728
+ if ( lim >= fake_i) {
729
+ lim = fake_i;
730
+ }
731
+
732
+ let machine: & fn ( ( uint , char ) ) -> bool = |( i, c) | {
733
+ let whitespace = if :: std:: char:: is_whitespace ( c) { Ws } else { Cr } ;
734
+ let limit = if ( i - slice_start + 1 ) <= lim { UnderLim } else { OverLim } ;
735
+
736
+ state = match ( state, whitespace, limit) {
737
+ ( A , Ws , _) => { A }
738
+ ( A , Cr , _) => { slice_start = i; last_start = i; B }
739
+
740
+ ( B , Cr , UnderLim ) => { B }
741
+ ( B , Cr , OverLim ) if ( i - last_start + 1 ) > lim
742
+ => fail ! ( "word starting with %? longer than limit!" ,
743
+ ss. slice( last_start, i + 1 ) ) ,
744
+ ( B , Cr , OverLim ) => { slice ( ) ; slice_start = last_start; B }
745
+ ( B , Ws , UnderLim ) => { last_end = i; C }
746
+ ( B , Ws , OverLim ) => { last_end = i; slice ( ) ; A }
747
+
748
+ ( C , Cr , UnderLim ) => { last_start = i; B }
749
+ ( C , Cr , OverLim ) => { slice ( ) ; slice_start = i; last_start = i; last_end = i; B }
750
+ ( C , Ws , OverLim ) => { slice ( ) ; A }
751
+ ( C , Ws , UnderLim ) => { C }
752
+ } ;
753
+
754
+ cont
755
+ } ;
756
+
757
+ ss. iter ( ) . enumerate ( ) . advance ( |x| machine ( x) ) ;
758
+
759
+ // Let the automaton 'run out' by supplying trailing whitespace
760
+ while cont && match state { B | C => true , A => false } {
761
+ machine ( ( fake_i, ' ' ) ) ;
762
+ fake_i += 1 ;
763
+ }
764
+ return cont;
765
+ }
766
+
767
+ #[ test]
768
+ priv fn test_split_within ( ) {
769
+ fn t ( s : & str , i : uint , u : & [ ~str ] ) {
770
+ let mut v = ~[ ] ;
771
+ for each_split_within( s, i) |s| { v. push ( s. to_owned ( ) ) }
772
+ assert ! ( v. iter( ) . zip( u. iter( ) ) . all( |( a, b) | a == b) ) ;
773
+ }
774
+ t ( "" , 0 , [ ] ) ;
775
+ t ( "" , 15 , [ ] ) ;
776
+ t ( "hello" , 15 , [ ~"hello"] ) ;
777
+ t ( "\n Mary had a little lamb\n Little lamb\n " , 15 ,
778
+ [ ~"Mary had a", ~"little lamb", ~"Little lamb"] ) ;
779
+ t ( "\n Mary had a little lamb\n Little lamb\n " , :: std:: uint:: max_value,
780
+ [ ~"Mary had a little lamb\n Little lamb"] ) ;
781
+ }
686
782
} // end groups module
687
783
688
784
#[ cfg( test) ]
0 commit comments