From 1deebeef7d18612565029da7d5adb5d71f280d85 Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Mon, 27 May 2013 12:08:37 +0200 Subject: [PATCH 1/4] Fix #3961 : use char range methods instead of byte offsets to detect whitespace. --- src/libsyntax/parse/comments.rs | 37 +++++---- src/test/pretty/block-comment-wchar.rs | 109 +++++++++++++++++++++++++ 2 files changed, 132 insertions(+), 14 deletions(-) create mode 100644 src/test/pretty/block-comment-wchar.rs diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 5c56ea6c446c0..29fac8f951d61 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -198,26 +198,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool, } } -// FIXME #3961: This is not the right way to convert string byte -// offsets to characters. -fn all_whitespace(s: &str, begin: uint, end: uint) -> bool { - let mut i: uint = begin; - while i != end { - if !is_whitespace(s[i] as char) { return false; } i += 1u; +// Returns None if the first col chars of s contain a non-whitespace char. +// Otherwise returns Some(k) where k is first char offset after that leading +// whitespace. Note k may be outside bounds of s. +fn all_whitespace(s: &str, col: CharPos) -> Option { + let len = s.len(); + let mut col = col.to_uint(); + let mut cursor: uint = 0; + while col > 0 && cursor < len { + let r: str::CharRange = str::char_range_at(s, cursor); + if !r.ch.is_whitespace() { + return None; + } + cursor = r.next; + col -= 1; } - return true; + return Some(cursor); } fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str], s: ~str, col: CharPos) { let len = s.len(); - // FIXME #3961: Doing bytewise comparison and slicing with CharPos - let col = col.to_uint(); - let s1 = if all_whitespace(s, 0, uint::min(len, col)) { - if col < len { - s.slice(col, len).to_owned() - } else { ~"" } - } else { s }; + let s1 = match all_whitespace(s, col) { + Some(col) => { + if col < len { + s.slice(col, len).to_owned() + } else { ~"" } + } + None => s, + }; debug!("pushing line: %s", s1); lines.push(s1); } diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs new file mode 100644 index 0000000000000..cd97e4174f305 --- /dev/null +++ b/src/test/pretty/block-comment-wchar.rs @@ -0,0 +1,109 @@ +// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is meant as a test case for Issue 3961. +// +// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs + +fn f() { + fn nested() { + /* + Spaced2 + */ + /* + Spaced10 + */ + /* + Tabbed8+2 + */ + /* + CR8+2 + */ + } + /* + Spaced2: (prefixed so start of space aligns with comment) + */ + /* + Tabbed2: (more indented b/c *start* of space will align with comment) + */ + /* + Spaced6: (Alignment removed and realigning spaces inserted) + */ + /* + Tabbed4+2: (Alignment removed and realigning spaces inserted) + */ + + /* + VT4+2: (should align) + */ + /* + FF4+2: (should align) + */ + /* + CR4+2: (should align) + */ + /* + // (NEL deliberately omitted) + */ + /* +     Ogham Space Mark 4+2: (should align) + */ + /* +᠎᠎᠎᠎ Mongolian Vowel Separator 4+2: (should align) + */ + /* +     Four-per-em space 4+2: (should align) + */ + + /* + ᠎ Mongolian Vowel Sep count 1: (should align) + ᠎ Mongolian Vowel Sep count 2: (should align) + ᠎᠎ Mongolian Vowel Sep count 3: (should align) + ᠎ Mongolian Vowel Sep count 4: (should align) + ᠎ ᠎ Mongolian Vowel Sep count 5: (should align) + ᠎᠎ Mongolian Vowel Sep count 6: (should align) + ᠎᠎᠎ Mongolian Vowel Sep count 7: (should align) +᠎ Mongolian Vowel Sep count 8: (should align) +᠎ ᠎ Mongolian Vowel Sep count 9: (should align) +᠎ ᠎ Mongolian Vowel Sep count A: (should align) +᠎ ᠎᠎ Mongolian Vowel Sep count B: (should align) +᠎᠎ Mongolian Vowel Sep count C: (should align) +᠎᠎ ᠎ Mongolian Vowel Sep count D: (should align) +᠎᠎᠎ Mongolian Vowel Sep count E: (should align) +᠎᠎᠎᠎ Mongolian Vowel Sep count F: (should align) + */ + +/* */ /* + Hello from offset 6 + Space 6+2: compare A +᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare B + */ +/*᠎*/ /* + Hello from another offset 6 with wchars establishing column offset + Space 6+2: compare C +᠎᠎᠎᠎᠎᠎ Mongolian Vowel Separator 6+2: compare D + */ +} + +fn main() { + // Taken from http://en.wikipedia.org/wiki/Whitespace_character + let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20', + // '\x85', // for some reason Rust thinks NEL isn't whitespace + '\xA0', '\u1680', '\u180E', + '\u2000', '\u2001', '\u2002', '\u2003', + '\u2004', '\u2005', '\u2006', '\u2007', + '\u2008', '\u2009', '\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', + '\u3000' + ]; + for vec::each(chars) |c| { + io::println(fmt!("%? %?", c, c.is_whitespace())); + } +} From 0f4d5c2134fb8ecbac02678eaf512ec13914a75d Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Tue, 28 May 2013 19:17:17 +0200 Subject: [PATCH 2/4] checkpoint block-comment-wchar state. --- src/test/pretty/block-comment-wchar.pp | 110 +++++++++++++++++++++++++ src/test/pretty/block-comment-wchar.rs | 5 +- 2 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 src/test/pretty/block-comment-wchar.pp diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp new file mode 100644 index 0000000000000..b591b83ee37e2 --- /dev/null +++ b/src/test/pretty/block-comment-wchar.pp @@ -0,0 +1,110 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// This is meant as a test case for Issue 3961. +// +// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs +// +// pp-exact:block-comment-wchar.pp +fn f() { + fn nested() { + /* + Spaced2 + */ + /* + Spaced10 + */ + /* + Tabbed8+2 + */ + /* + CR8+2 + */ + } + /* + Spaced2: (prefixed so start of space aligns with comment) + */ + /* + Tabbed2: (more indented b/c *start* of space will align with comment) + */ + /* + Spaced6: (Alignment removed and realigning spaces inserted) + */ + /* + Tabbed4+2: (Alignment removed and realigning spaces inserted) + */ + + /* + VT4+2: (should align) + */ + /* + FF4+2: (should align) + */ + /* + CR4+2: (should align) + */ + /* + // (NEL deliberately omitted) + */ + /* + Ogham Space Mark 4+2: (should align) + */ + /* + Mongolian Vowel Separator 4+2: (should align) + */ + /* + Four-per-em space 4+2: (should align) + */ + + /* + Mongolian Vowel Sep count 1: (should align) + Mongolian Vowel Sep count 2: (should align) + Mongolian Vowel Sep count 3: (should align) + Mongolian Vowel Sep count 4: (should align) + Mongolian Vowel Sep count 5: (should align) + Mongolian Vowel Sep count 6: (should align) + Mongolian Vowel Sep count 7: (should align) + Mongolian Vowel Sep count 8: (should align) + Mongolian Vowel Sep count 9: (should align) + Mongolian Vowel Sep count A: (should align) + Mongolian Vowel Sep count B: (should align) + Mongolian Vowel Sep count C: (should align) + Mongolian Vowel Sep count D: (should align) + Mongolian Vowel Sep count E: (should align) + Mongolian Vowel Sep count F: (should align) + */ + +/* */ /* + Hello from offset 6 + Space 6+2: compare A + Mongolian Vowel Separator 6+2: compare B + */ +/*᠎*/ /* + Hello from another offset 6 with wchars establishing column offset + Space 6+2: compare C + Mongolian Vowel Separator 6+2: compare D + */ +} + +fn main() { + // Taken from http://en.wikipedia.org/wiki/Whitespace_character + let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20', + // '\x85', // for some reason Rust thinks NEL isn't whitespace + '\xA0', '\u1680', '\u180E', + '\u2000', '\u2001', '\u2002', '\u2003', + '\u2004', '\u2005', '\u2006', '\u2007', + '\u2008', '\u2009', '\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', + '\u3000' + ]; + for vec::each(chars) |c| { + io::println(fmt!("%? %?", c, c.is_whitespace())); + } +} diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs index cd97e4174f305..421857850b50c 100644 --- a/src/test/pretty/block-comment-wchar.rs +++ b/src/test/pretty/block-comment-wchar.rs @@ -1,4 +1,4 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -11,7 +11,8 @@ // This is meant as a test case for Issue 3961. // // Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs - +// +// pp-exact:block-comment-wchar.pp fn f() { fn nested() { /* From b03a0b27aa6fd0d2b8be4434c73ad45311e292fc Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Tue, 28 May 2013 20:41:35 +0200 Subject: [PATCH 3/4] make pp file conform to actual output, noting some oddities along the way. --- src/test/pretty/block-comment-wchar.pp | 46 +++++++++++++++----------- src/test/pretty/block-comment-wchar.rs | 19 +++++------ 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp index b591b83ee37e2..9c80057ccef02 100644 --- a/src/test/pretty/block-comment-wchar.pp +++ b/src/test/pretty/block-comment-wchar.pp @@ -81,30 +81,36 @@ Mongolian Vowel Sep count F: (should align) */ -/* */ /* - Hello from offset 6 - Space 6+2: compare A - Mongolian Vowel Separator 6+2: compare B - */ -/*᠎*/ /* - Hello from another offset 6 with wchars establishing column offset - Space 6+2: compare C - Mongolian Vowel Separator 6+2: compare D - */ + + + /* */ + + /* + Hello from offset 6 + Space 6+2: compare A + Mongolian Vowel Separator 6+2: compare B + */ + + /*᠎*/ + + /* + Hello from another offset 6 with wchars establishing column offset + Space 6+2: compare C + Mongolian Vowel Separator 6+2: compare D + */ } fn main() { // Taken from http://en.wikipedia.org/wiki/Whitespace_character - let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20', - // '\x85', // for some reason Rust thinks NEL isn't whitespace - '\xA0', '\u1680', '\u180E', - '\u2000', '\u2001', '\u2002', '\u2003', - '\u2004', '\u2005', '\u2006', '\u2007', - '\u2008', '\u2009', '\u200A', - '\u2028', '\u2029', '\u202F', '\u205F', - '\u3000' - ]; + let chars = + ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', + // '\x85', // for some reason Rust thinks NEL isn't whitespace + '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003', + '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', '\u3000']; + // <= bugs in pretty-printer? for vec::each(chars) |c| { - io::println(fmt!("%? %?", c, c.is_whitespace())); + let ws = c.is_whitespace(); + io::println(fmt!("%? %?" , c , ws)); } } diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs index 421857850b50c..a56fa91f39f09 100644 --- a/src/test/pretty/block-comment-wchar.rs +++ b/src/test/pretty/block-comment-wchar.rs @@ -81,6 +81,7 @@ fn f() { ᠎᠎᠎᠎ Mongolian Vowel Sep count F: (should align) */ + /* */ /* Hello from offset 6 Space 6+2: compare A @@ -95,16 +96,14 @@ fn f() { fn main() { // Taken from http://en.wikipedia.org/wiki/Whitespace_character - let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20', - // '\x85', // for some reason Rust thinks NEL isn't whitespace - '\xA0', '\u1680', '\u180E', - '\u2000', '\u2001', '\u2002', '\u2003', - '\u2004', '\u2005', '\u2006', '\u2007', - '\u2008', '\u2009', '\u200A', - '\u2028', '\u2029', '\u202F', '\u205F', - '\u3000' - ]; + let chars = + ['\x0A', '\x0B', '\x0C', '\x0D', '\x20', + // '\x85', // for some reason Rust thinks NEL isn't whitespace + '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003', + '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', + '\u2028', '\u2029', '\u202F', '\u205F', '\u3000']; for vec::each(chars) |c| { - io::println(fmt!("%? %?", c, c.is_whitespace())); + let ws = c.is_whitespace(); + io::println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer? } } From 876f6deb4af73d3a6a9845c8ca0a9edff0e25989 Mon Sep 17 00:00:00 2001 From: "Felix S. Klock II" Date: Fri, 14 Jun 2013 09:36:03 +0200 Subject: [PATCH 4/4] fixed code to placate new restrictions on form of function/method invocations. --- src/libsyntax/parse/comments.rs | 2 +- src/test/pretty/block-comment-wchar.pp | 4 ++-- src/test/pretty/block-comment-wchar.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 29fac8f951d61..572b657d6a122 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -206,7 +206,7 @@ fn all_whitespace(s: &str, col: CharPos) -> Option { let mut col = col.to_uint(); let mut cursor: uint = 0; while col > 0 && cursor < len { - let r: str::CharRange = str::char_range_at(s, cursor); + let r: str::CharRange = s.char_range_at(cursor); if !r.ch.is_whitespace() { return None; } diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp index 9c80057ccef02..911de166e8fc7 100644 --- a/src/test/pretty/block-comment-wchar.pp +++ b/src/test/pretty/block-comment-wchar.pp @@ -109,8 +109,8 @@ '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', '\u2028', '\u2029', '\u202F', '\u205F', '\u3000']; // <= bugs in pretty-printer? - for vec::each(chars) |c| { + for chars.each |c| { let ws = c.is_whitespace(); - io::println(fmt!("%? %?" , c , ws)); + println(fmt!("%? %?" , c , ws)); } } diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs index a56fa91f39f09..d8a820542a721 100644 --- a/src/test/pretty/block-comment-wchar.rs +++ b/src/test/pretty/block-comment-wchar.rs @@ -102,8 +102,8 @@ fn main() { '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003', '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A', '\u2028', '\u2029', '\u202F', '\u205F', '\u3000']; - for vec::each(chars) |c| { + for chars.each |c| { let ws = c.is_whitespace(); - io::println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer? + println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer? } }