Skip to content

libcore: handle trailing newlines more like other languages. #5398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 22, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions src/compiletest/runtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ fn run_debuginfo_test(config: config, props: TestProps, testfile: &Path) {
// check if each line in props.check_lines appears in the
// output (in order)
let mut i = 0u;
for str::lines(ProcRes.stdout).each |line| {
for str::lines_each(ProcRes.stdout) |line| {
if props.check_lines[i].trim() == line.trim() {
i += 1u;
}
Expand Down Expand Up @@ -297,8 +297,8 @@ fn check_error_patterns(props: TestProps,
let mut next_err_idx = 0u;
let mut next_err_pat = props.error_patterns[next_err_idx];
let mut done = false;
for str::split_char(ProcRes.stderr, '\n').each |line| {
if str::contains(*line, next_err_pat) {
for str::lines_each(ProcRes.stderr) |line| {
if str::contains(line, next_err_pat) {
debug!("found error pattern %s", next_err_pat);
next_err_idx += 1u;
if next_err_idx == vec::len(props.error_patterns) {
Expand Down Expand Up @@ -347,15 +347,15 @@ fn check_expected_errors(expected_errors: ~[errors::ExpectedError],
// filename:line1:col1: line2:col2: *warning:* msg
// where line1:col1: is the starting point, line2:col2:
// is the ending point, and * represents ANSI color codes.
for str::split_char(ProcRes.stderr, '\n').each |line| {
for str::lines_each(ProcRes.stderr) |line| {
let mut was_expected = false;
for vec::eachi(expected_errors) |i, ee| {
if !found_flags[i] {
debug!("prefix=%s ee.kind=%s ee.msg=%s line=%s",
prefixes[i], ee.kind, ee.msg, *line);
if (str::starts_with(*line, prefixes[i]) &&
str::contains(*line, ee.kind) &&
str::contains(*line, ee.msg)) {
prefixes[i], ee.kind, ee.msg, line);
if (str::starts_with(line, prefixes[i]) &&
str::contains(line, ee.kind) &&
str::contains(line, ee.msg)) {
found_flags[i] = true;
was_expected = true;
break;
Expand All @@ -364,13 +364,13 @@ fn check_expected_errors(expected_errors: ~[errors::ExpectedError],
}

// ignore this msg which gets printed at the end
if str::contains(*line, ~"aborting due to") {
if str::contains(line, ~"aborting due to") {
was_expected = true;
}

if !was_expected && is_compiler_error_or_warning(*line) {
if !was_expected && is_compiler_error_or_warning(str::from_slice(line)) {
fatal_ProcRes(fmt!("unexpected compiler error or warning: '%s'",
*line),
line),
ProcRes);
}
}
Expand Down
55 changes: 49 additions & 6 deletions src/libcore/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ pub trait ReaderUtil {
/// Read len bytes into a new vec.
fn read_bytes(&self, len: uint) -> ~[u8];

/// Read up until a specified character (which is not returned) or EOF.
fn read_until(&self, c: char) -> ~str;
/// Read up until a specified character (which is optionally included) or EOF.
fn read_until(&self, c: char, include: bool) -> ~str;

/// Read up until the first '\n' char (which is not returned), or EOF.
fn read_line(&self) -> ~str;
Expand All @@ -126,6 +126,9 @@ pub trait ReaderUtil {
/// Iterate over every line until the iterator breaks or EOF.
fn each_line(&self, it: &fn(&str) -> bool);

/// Read all the lines of the file into a vector.
fn read_lines(&self) -> ~[~str];

/// Read n (between 1 and 8) little-endian unsigned integer bytes.
fn read_le_uint_n(&self, nbytes: uint) -> u64;

Expand Down Expand Up @@ -219,11 +222,14 @@ impl<T:Reader> ReaderUtil for T {
bytes
}

fn read_until(&self, c: char) -> ~str {
fn read_until(&self, c: char, include: bool) -> ~str {
let mut bytes = ~[];
loop {
let ch = self.read_byte();
if ch == -1 || ch == c as int {
if include && ch == c as int {
bytes.push(ch as u8);
}
break;
}
bytes.push(ch as u8);
Expand All @@ -232,7 +238,7 @@ impl<T:Reader> ReaderUtil for T {
}

fn read_line(&self) -> ~str {
self.read_until('\n')
self.read_until('\n', false)
}

fn read_chars(&self, n: uint) -> ~[char] {
Expand Down Expand Up @@ -306,7 +312,7 @@ impl<T:Reader> ReaderUtil for T {
}

fn read_c_str(&self) -> ~str {
self.read_until(0 as char)
self.read_until(0 as char, false)
}

fn read_whole_stream(&self) -> ~[u8] {
Expand All @@ -329,7 +335,29 @@ impl<T:Reader> ReaderUtil for T {

fn each_line(&self, it: &fn(s: &str) -> bool) {
while !self.eof() {
if !it(self.read_line()) { break; }
// include the \n, so that we can distinguish an entirely empty
// line read after "...\n", and the trailing empty line in
// "...\n\n".
let mut line = self.read_until('\n', true);

// blank line at the end of the reader is ignored
if self.eof() && line.is_empty() { break; }

// trim the \n, so that each_line is consistent with read_line
let n = str::len(line);
if line[n-1] == '\n' as u8 {
unsafe { str::raw::set_len(&mut line, n-1); }
}

if !it(line) { break; }
}
}

fn read_lines(&self) -> ~[~str] {
do vec::build |push| {
for self.each_line |line| {
push(str::from_slice(line));
}
}
}

Expand Down Expand Up @@ -1335,6 +1363,21 @@ mod tests {
}
}

#[test]
fn test_read_lines() {
do io::with_str_reader(~"a\nb\nc\n") |inp| {
fail_unless!(inp.read_lines() == ~[~"a", ~"b", ~"c"]);
}

do io::with_str_reader(~"a\nb\nc") |inp| {
fail_unless!(inp.read_lines() == ~[~"a", ~"b", ~"c"]);
}

do io::with_str_reader(~"") |inp| {
fail_unless!(inp.read_lines().is_empty());
}
}

#[test]
fn test_readchars_wide() {
let wide_test = ~"生锈的汤匙切肉汤hello生锈的汤匙切肉汤";
Expand Down
118 changes: 92 additions & 26 deletions src/libcore/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,28 +437,37 @@ pub pure fn slice(s: &'a str, begin: uint, end: uint) -> &'a str {
unsafe { raw::slice_bytes(s, begin, end) }
}

/// Splits a string into substrings at each occurrence of a given character
/// Splits a string into substrings at each occurrence of a given
/// character.
pub pure fn split_char(s: &str, sep: char) -> ~[~str] {
split_char_inner(s, sep, len(s), true)
split_char_inner(s, sep, len(s), true, true)
}

/**
* Splits a string into substrings at each occurrence of a given
* character up to 'count' times
* character up to 'count' times.
*
* The byte must be a valid UTF-8/ASCII byte
*/
pub pure fn splitn_char(s: &str, sep: char, count: uint) -> ~[~str] {
split_char_inner(s, sep, count, true)
split_char_inner(s, sep, count, true, true)
}

/// Like `split_char`, but omits empty strings from the returned vector
pub pure fn split_char_nonempty(s: &str, sep: char) -> ~[~str] {
split_char_inner(s, sep, len(s), false)
split_char_inner(s, sep, len(s), false, false)
}

pure fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool)
-> ~[~str] {
/**
* Like `split_char`, but a trailing empty string is omitted
* (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
*/
pub pure fn split_char_no_trailing(s: &str, sep: char) -> ~[~str] {
split_char_inner(s, sep, len(s), true, false)
}

pure fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
allow_trailing_empty: bool) -> ~[~str] {
if sep < 128u as char {
let b = sep as u8, l = len(s);
let mut result = ~[], done = 0u;
Expand All @@ -475,19 +484,20 @@ pure fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool)
}
i += 1u;
}
if allow_empty || start < l {
// only push a non-empty trailing substring
if allow_trailing_empty || start < l {
unsafe { result.push(raw::slice_bytes_unique(s, start, l) ) };
}
result
} else {
splitn(s, |cur| cur == sep, count)
split_inner(s, |cur| cur == sep, count, allow_empty, allow_trailing_empty)
}
}


/// Splits a string into substrings using a character function
pub pure fn split(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] {
split_inner(s, sepfn, len(s), true)
split_inner(s, sepfn, len(s), true, true)
}

/**
Expand All @@ -498,16 +508,25 @@ pub pure fn splitn(s: &str,
sepfn: &fn(char) -> bool,
count: uint)
-> ~[~str] {
split_inner(s, sepfn, count, true)
split_inner(s, sepfn, count, true, true)
}

/// Like `split`, but omits empty strings from the returned vector
pub pure fn split_nonempty(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] {
split_inner(s, sepfn, len(s), false)
split_inner(s, sepfn, len(s), false, false)
}


/**
* Like `split`, but a trailing empty string is omitted
* (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
*/
pub pure fn split_no_trailing(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] {
split_inner(s, sepfn, len(s), true, false)
}

pure fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
allow_empty: bool) -> ~[~str] {
allow_empty: bool, allow_trailing_empty: bool) -> ~[~str] {
let l = len(s);
let mut result = ~[], i = 0u, start = 0u, done = 0u;
while i < l && done < count {
Expand All @@ -523,7 +542,7 @@ pure fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
}
i = next;
}
if allow_empty || start < l {
if allow_trailing_empty || start < l {
unsafe {
result.push(raw::slice_bytes_unique(s, start, l));
}
Expand Down Expand Up @@ -630,9 +649,11 @@ pub fn levdistance(s: &str, t: &str) -> uint {
}

/**
* Splits a string into a vector of the substrings separated by LF ('\n')
* Splits a string into a vector of the substrings separated by LF ('\n').
*/
pub pure fn lines(s: &str) -> ~[~str] { split_char(s, '\n') }
pub pure fn lines(s: &str) -> ~[~str] {
split_char_no_trailing(s, '\n')
}

/**
* Splits a string into a vector of the substrings separated by LF ('\n')
Expand All @@ -651,7 +672,7 @@ pub pure fn lines_any(s: &str) -> ~[~str] {

/// Splits a string into a vector of the substrings separated by whitespace
pub pure fn words(s: &str) -> ~[~str] {
split_nonempty(s, |c| char::is_whitespace(c))
split_nonempty(s, char::is_whitespace)
}

/** Split a string into a vector of substrings,
Expand Down Expand Up @@ -2669,6 +2690,35 @@ mod tests {

}

#[test]
fn test_split_char_no_trailing() {
fn t(s: &str, c: char, u: &[~str]) {
debug!(~"split_byte: " + s);
let v = split_char_no_trailing(s, c);
debug!("split_byte to: %?", v);
fail_unless!(vec::all2(v, u, |a,b| a == b));
}
t(~"abc.hello.there", '.', ~[~"abc", ~"hello", ~"there"]);
t(~".hello.there", '.', ~[~"", ~"hello", ~"there"]);
t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there"]);

fail_unless!(~[~"", ~"", ~"", ~"hello", ~"there"]
== split_char_no_trailing(~"...hello.there.", '.'));

fail_unless!(~[] == split_char_no_trailing(~"", 'z'));
fail_unless!(~[~""] == split_char_no_trailing(~"z", 'z'));
fail_unless!(~[~"ok"] == split_char_no_trailing(~"ok", 'z'));
}

#[test]
fn test_split_char_no_trailing_2() {
let data = ~"ประเทศไทย中华Việt Nam";
fail_unless!(~[~"ประเทศไทย中华", ~"iệt Nam"]
== split_char_no_trailing(data, 'V'));
fail_unless!(~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]
== split_char_no_trailing(data, 'ท'));
}

#[test]
fn test_split_str() {
fn t(s: &str, sep: &'a str, i: int, k: &str) {
Expand Down Expand Up @@ -2722,28 +2772,45 @@ mod tests {
fail_unless!(~[~"ok"] == split(~"ok", |cc| cc == 'z'));
}

#[test]
fn test_split_no_trailing() {
let data = ~"ประเทศไทย中华Việt Nam";
fail_unless!(~[~"ประเทศไทย中", ~"Việt Nam"]
== split_no_trailing (data, |cc| cc == '华'));

fail_unless!(~[~"", ~"", ~"XXX", ~"YYY"]
== split_no_trailing(~"zzXXXzYYYz", char::is_lowercase));

fail_unless!(~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]
== split_no_trailing(~"zzXXXzYYYz", char::is_uppercase));

fail_unless!(~[~""] == split_no_trailing(~"z", |cc| cc == 'z'));
fail_unless!(~[] == split_no_trailing(~"", |cc| cc == 'z'));
fail_unless!(~[~"ok"] == split_no_trailing(~"ok", |cc| cc == 'z'));
}

#[test]
fn test_lines() {
let lf = ~"\nMary had a little lamb\nLittle lamb\n";
let crlf = ~"\r\nMary had a little lamb\r\nLittle lamb\r\n";

fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb", ~""]
fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"]
== lines(lf));

fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb", ~""]
fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"]
== lines_any(lf));

fail_unless!(~[~"\r", ~"Mary had a little lamb\r",
~"Little lamb\r", ~""]
~"Little lamb\r"]
== lines(crlf));

fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb", ~""]
fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"]
== lines_any(crlf));

fail_unless!(~[~""] == lines (~""));
fail_unless!(~[~""] == lines_any(~""));
fail_unless!(~[~"",~""] == lines (~"\n"));
fail_unless!(~[~"",~""] == lines_any(~"\n"));
fail_unless!(~[] == lines (~""));
fail_unless!(~[] == lines_any(~""));
fail_unless!(~[~""] == lines (~"\n"));
fail_unless!(~[~""] == lines_any(~"\n"));
fail_unless!(~[~"banana"] == lines (~"banana"));
fail_unless!(~[~"banana"] == lines_any(~"banana"));
}
Expand Down Expand Up @@ -3359,7 +3426,6 @@ mod tests {
0 => fail_unless!("" == x),
1 => fail_unless!("Mary had a little lamb" == x),
2 => fail_unless!("Little lamb" == x),
3 => fail_unless!("" == x),
_ => ()
}
ii += 1;
Expand Down