Skip to content

Commit 7fc8c14

Browse files
committed
auto merge of #8082 : Kimundi/rust/master, r=huonw
Also renamed bytes_iter to byte_iter to match other iterators
2 parents c124f21 + e33fca9 commit 7fc8c14

File tree

7 files changed

+592
-597
lines changed

7 files changed

+592
-597
lines changed

Diff for: src/libextra/getopts.rs

+98-2
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,6 @@ pub mod groups {
476476
use getopts::{HasArg, Long, Maybe, Multi, No, Occur, Opt, Optional, Req};
477477
use getopts::{Short, Yes};
478478

479-
use std::str;
480479
use std::vec;
481480

482481
/** one group of options, e.g., both -h and --help, along with
@@ -667,7 +666,7 @@ pub mod groups {
667666

668667
// FIXME: #5516
669668
let mut desc_rows = ~[];
670-
for str::each_split_within(desc_normalized_whitespace, 54) |substr| {
669+
for each_split_within(desc_normalized_whitespace, 54) |substr| {
671670
desc_rows.push(substr.to_owned());
672671
}
673672

@@ -683,6 +682,103 @@ pub mod groups {
683682
rows.collect::<~[~str]>().connect("\n") +
684683
"\n\n";
685684
}
685+
686+
/** Splits a string into substrings with possibly internal whitespace,
687+
* each of them at most `lim` bytes long. The substrings have leading and trailing
688+
* whitespace removed, and are only cut at whitespace boundaries.
689+
*
690+
* Note: Function was moved here from `std::str` because this module is the only place that
691+
* uses it, and because it was to specific for a general string function.
692+
*
693+
* #Failure:
694+
*
695+
* Fails during iteration if the string contains a non-whitespace
696+
* sequence longer than the limit.
697+
*/
698+
priv fn each_split_within<'a>(ss: &'a str,
699+
lim: uint,
700+
it: &fn(&'a str) -> bool) -> bool {
701+
// Just for fun, let's write this as an state machine:
702+
703+
enum SplitWithinState {
704+
A, // leading whitespace, initial state
705+
B, // words
706+
C, // internal and trailing whitespace
707+
}
708+
enum Whitespace {
709+
Ws, // current char is whitespace
710+
Cr // current char is not whitespace
711+
}
712+
enum LengthLimit {
713+
UnderLim, // current char makes current substring still fit in limit
714+
OverLim // current char makes current substring no longer fit in limit
715+
}
716+
717+
let mut slice_start = 0;
718+
let mut last_start = 0;
719+
let mut last_end = 0;
720+
let mut state = A;
721+
let mut fake_i = ss.len();
722+
let mut lim = lim;
723+
724+
let mut cont = true;
725+
let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
726+
727+
// if the limit is larger than the string, lower it to save cycles
728+
if (lim >= fake_i) {
729+
lim = fake_i;
730+
}
731+
732+
let machine: &fn((uint, char)) -> bool = |(i, c)| {
733+
let whitespace = if ::std::char::is_whitespace(c) { Ws } else { Cr };
734+
let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
735+
736+
state = match (state, whitespace, limit) {
737+
(A, Ws, _) => { A }
738+
(A, Cr, _) => { slice_start = i; last_start = i; B }
739+
740+
(B, Cr, UnderLim) => { B }
741+
(B, Cr, OverLim) if (i - last_start + 1) > lim
742+
=> fail!("word starting with %? longer than limit!",
743+
ss.slice(last_start, i + 1)),
744+
(B, Cr, OverLim) => { slice(); slice_start = last_start; B }
745+
(B, Ws, UnderLim) => { last_end = i; C }
746+
(B, Ws, OverLim) => { last_end = i; slice(); A }
747+
748+
(C, Cr, UnderLim) => { last_start = i; B }
749+
(C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
750+
(C, Ws, OverLim) => { slice(); A }
751+
(C, Ws, UnderLim) => { C }
752+
};
753+
754+
cont
755+
};
756+
757+
ss.iter().enumerate().advance(|x| machine(x));
758+
759+
// Let the automaton 'run out' by supplying trailing whitespace
760+
while cont && match state { B | C => true, A => false } {
761+
machine((fake_i, ' '));
762+
fake_i += 1;
763+
}
764+
return cont;
765+
}
766+
767+
#[test]
768+
priv fn test_split_within() {
769+
fn t(s: &str, i: uint, u: &[~str]) {
770+
let mut v = ~[];
771+
for each_split_within(s, i) |s| { v.push(s.to_owned()) }
772+
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
773+
}
774+
t("", 0, []);
775+
t("", 15, []);
776+
t("hello", 15, [~"hello"]);
777+
t("\nMary had a little lamb\nLittle lamb\n", 15,
778+
[~"Mary had a", ~"little lamb", ~"Little lamb"]);
779+
t("\nMary had a little lamb\nLittle lamb\n", ::std::uint::max_value,
780+
[~"Mary had a little lamb\nLittle lamb"]);
781+
}
686782
} // end groups module
687783

688784
#[cfg(test)]

Diff for: src/libextra/time.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ impl Tm {
260260
priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
261261
fn match_str(s: &str, pos: uint, needle: &str) -> bool {
262262
let mut i = pos;
263-
for needle.bytes_iter().advance |ch| {
263+
for needle.byte_iter().advance |ch| {
264264
if s[i] != ch {
265265
return false;
266266
}

0 commit comments

Comments
 (0)