From 1deebeef7d18612565029da7d5adb5d71f280d85 Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Mon, 27 May 2013 12:08:37 +0200
Subject: [PATCH 1/4] Fix #3961 : use char range methods instead of byte
 offsets to detect whitespace.

---
 src/libsyntax/parse/comments.rs        |  37 +++++----
 src/test/pretty/block-comment-wchar.rs | 109 +++++++++++++++++++++++++
 2 files changed, 132 insertions(+), 14 deletions(-)
 create mode 100644 src/test/pretty/block-comment-wchar.rs
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index 5c56ea6c446c0..29fac8f951d61 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -198,26 +198,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
     }
 }
 
-// FIXME #3961: This is not the right way to convert string byte
-// offsets to characters.
-fn all_whitespace(s: &str, begin: uint, end: uint) -> bool {
-    let mut i: uint = begin;
-    while i != end {
-        if !is_whitespace(s[i] as char) { return false; } i += 1u;
+// Returns None if the first col chars of s contain a non-whitespace char.
+// Otherwise returns Some(k) where k is first char offset after that leading
+// whitespace.  Note k may be outside bounds of s.
+fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
+    let len = s.len();
+    let mut col = col.to_uint();
+    let mut cursor: uint = 0;
+    while col > 0 && cursor < len {
+        let r: str::CharRange = str::char_range_at(s, cursor);
+        if !r.ch.is_whitespace() {
+            return None;
+        }
+        cursor = r.next;
+        col -= 1;
     }
-    return true;
+    return Some(cursor);
 }
 
 fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
                                         s: ~str, col: CharPos) {
     let len = s.len();
-    // FIXME #3961: Doing bytewise comparison and slicing with CharPos
-    let col = col.to_uint();
-    let s1 = if all_whitespace(s, 0, uint::min(len, col)) {
-        if col < len {
-            s.slice(col, len).to_owned()
-        } else {  ~"" }
-    } else { s };
+    let s1 = match all_whitespace(s, col) {
+        Some(col) => {
+            if col < len {
+                s.slice(col, len).to_owned()
+            } else {  ~"" }
+        }
+        None => s,
+    };
     debug!("pushing line: %s", s1);
     lines.push(s1);
 }
diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs
new file mode 100644
index 0000000000000..cd97e4174f305
--- /dev/null
+++ b/src/test/pretty/block-comment-wchar.rs
@@ -0,0 +1,109 @@
+// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// This is meant as a test case for Issue 3961.
+//
+// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
+
+fn f() {
+    fn nested() {
+        /*
+  Spaced2
+        */
+        /*
+          Spaced10
+        */
+        /*
+								  Tabbed8+2
+        */
+        /*
+  CR8+2
+        */
+    }
+    /*
+  Spaced2:                       (prefixed so start of space aligns with comment)
+    */
+    /*
+		Tabbed2: (more indented b/c *start* of space will align with comment)
+    */
+    /*
+      Spaced6:                       (Alignment removed and realigning spaces inserted)
+    */
+    /*
+				  Tabbed4+2:                     (Alignment removed and realigning spaces inserted)
+    */
+
+    /*
+  VT4+2:                         (should align)
+    */
+    /*
+  FF4+2:                         (should align)
+    */
+    /*
+  CR4+2:                         (should align)
+    */
+    /*
+    // (NEL deliberately omitted)
+    */
+    /*
+      Ogham Space Mark 4+2:          (should align)
+    */
+    /*
+᠎᠎᠎᠎  Mongolian Vowel Separator 4+2: (should align)
+    */
+    /*
+      Four-per-em space 4+2:         (should align)
+    */
+
+    /*
+   ᠎  Mongolian Vowel Sep   count 1: (should align)
+  ᠎   Mongolian Vowel Sep   count 2: (should align)
+  ᠎᠎  Mongolian Vowel Sep   count 3: (should align)
+ ᠎    Mongolian Vowel Sep   count 4: (should align)
+ ᠎ ᠎  Mongolian Vowel Sep   count 5: (should align)
+ ᠎᠎   Mongolian Vowel Sep   count 6: (should align)
+ ᠎᠎᠎  Mongolian Vowel Sep   count 7: (should align)
+᠎     Mongolian Vowel Sep   count 8: (should align)
+᠎  ᠎  Mongolian Vowel Sep   count 9: (should align)
+᠎ ᠎   Mongolian Vowel Sep   count A: (should align)
+᠎ ᠎᠎  Mongolian Vowel Sep   count B: (should align)
+᠎᠎    Mongolian Vowel Sep   count C: (should align)
+᠎᠎ ᠎  Mongolian Vowel Sep   count D: (should align)
+᠎᠎᠎   Mongolian Vowel Sep   count E: (should align)
+᠎᠎᠎᠎  Mongolian Vowel Sep   count F: (should align)
+    */
+
+/* */ /*
+        Hello from offset 6
+        Space 6+2:                     compare A
+᠎᠎᠎᠎᠎᠎  Mongolian Vowel Separator 6+2: compare B
+      */
+/*᠎*/ /*
+        Hello from another offset 6 with wchars establishing column offset
+        Space 6+2:                     compare C
+᠎᠎᠎᠎᠎᠎  Mongolian Vowel Separator 6+2: compare D
+      */
+}
+
+fn main() {
+    // Taken from http://en.wikipedia.org/wiki/Whitespace_character
+    let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20',
+                 // '\x85', // for some reason Rust thinks NEL isn't whitespace
+                 '\xA0', '\u1680', '\u180E',
+                 '\u2000', '\u2001', '\u2002', '\u2003',
+                 '\u2004', '\u2005', '\u2006', '\u2007',
+                 '\u2008', '\u2009', '\u200A',
+                 '\u2028', '\u2029', '\u202F', '\u205F',
+                 '\u3000'
+                ];
+    for vec::each(chars) |c| {
+        io::println(fmt!("%? %?", c, c.is_whitespace()));
+    }
+}

From 0f4d5c2134fb8ecbac02678eaf512ec13914a75d Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Tue, 28 May 2013 19:17:17 +0200
Subject: [PATCH 2/4] checkpoint block-comment-wchar state.

---
 src/test/pretty/block-comment-wchar.pp | 110 +++++++++++++++++++++++++
 src/test/pretty/block-comment-wchar.rs |   5 +-
 2 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 src/test/pretty/block-comment-wchar.pp

diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp
new file mode 100644
index 0000000000000..b591b83ee37e2
--- /dev/null
+++ b/src/test/pretty/block-comment-wchar.pp
@@ -0,0 +1,110 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// This is meant as a test case for Issue 3961.
+//
+// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
+//
+// pp-exact:block-comment-wchar.pp
+fn f() {
+    fn nested() {
+        /*
+          Spaced2
+        */
+        /*
+          Spaced10
+        */
+        /*
+          Tabbed8+2
+        */
+        /*
+          CR8+2
+        */
+    }
+    /*
+      Spaced2:                       (prefixed so start of space aligns with comment)
+    */
+    /*
+    		Tabbed2: (more indented b/c *start* of space will align with comment)
+    */
+    /*
+      Spaced6:                       (Alignment removed and realigning spaces inserted)
+    */
+    /*
+      Tabbed4+2:                     (Alignment removed and realigning spaces inserted)
+    */
+
+    /*
+      VT4+2:                         (should align)
+    */
+    /*
+      FF4+2:                         (should align)
+    */
+    /*
+      CR4+2:                         (should align)
+    */
+    /*
+    // (NEL deliberately omitted)
+    */
+    /*
+      Ogham Space Mark 4+2:          (should align)
+    */
+    /*
+      Mongolian Vowel Separator 4+2: (should align)
+    */
+    /*
+      Four-per-em space 4+2:         (should align)
+    */
+
+    /*
+      Mongolian Vowel Sep   count 1: (should align)
+      Mongolian Vowel Sep   count 2: (should align)
+      Mongolian Vowel Sep   count 3: (should align)
+      Mongolian Vowel Sep   count 4: (should align)
+      Mongolian Vowel Sep   count 5: (should align)
+      Mongolian Vowel Sep   count 6: (should align)
+      Mongolian Vowel Sep   count 7: (should align)
+      Mongolian Vowel Sep   count 8: (should align)
+      Mongolian Vowel Sep   count 9: (should align)
+      Mongolian Vowel Sep   count A: (should align)
+      Mongolian Vowel Sep   count B: (should align)
+      Mongolian Vowel Sep   count C: (should align)
+      Mongolian Vowel Sep   count D: (should align)
+      Mongolian Vowel Sep   count E: (should align)
+      Mongolian Vowel Sep   count F: (should align)
+    */
+
+/* */ /*
+        Hello from offset 6
+        Space 6+2:                     compare A
+        Mongolian Vowel Separator 6+2: compare B
+      */
+/*᠎*/ /*
+        Hello from another offset 6 with wchars establishing column offset
+        Space 6+2:                     compare C
+        Mongolian Vowel Separator 6+2: compare D
+      */
+}
+
+fn main() {
+    // Taken from http://en.wikipedia.org/wiki/Whitespace_character
+    let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20',
+                 // '\x85', // for some reason Rust thinks NEL isn't whitespace
+                 '\xA0', '\u1680', '\u180E',
+                 '\u2000', '\u2001', '\u2002', '\u2003',
+                 '\u2004', '\u2005', '\u2006', '\u2007',
+                 '\u2008', '\u2009', '\u200A',
+                 '\u2028', '\u2029', '\u202F', '\u205F',
+                 '\u3000'
+                ];
+    for vec::each(chars) |c| {
+        io::println(fmt!("%? %?", c, c.is_whitespace()));
+    }
+}
diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs
index cd97e4174f305..421857850b50c 100644
--- a/src/test/pretty/block-comment-wchar.rs
+++ b/src/test/pretty/block-comment-wchar.rs
@@ -1,4 +1,4 @@
-// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -11,7 +11,8 @@
 // This is meant as a test case for Issue 3961.
 //
 // Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
-
+//
+// pp-exact:block-comment-wchar.pp
 fn f() {
     fn nested() {
         /*

From b03a0b27aa6fd0d2b8be4434c73ad45311e292fc Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Tue, 28 May 2013 20:41:35 +0200
Subject: [PATCH 3/4] make pp file conform to actual output, noting some
 oddities along the way.

---
 src/test/pretty/block-comment-wchar.pp | 46 +++++++++++++++-----------
 src/test/pretty/block-comment-wchar.rs | 19 +++++------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp
index b591b83ee37e2..9c80057ccef02 100644
--- a/src/test/pretty/block-comment-wchar.pp
+++ b/src/test/pretty/block-comment-wchar.pp
@@ -81,30 +81,36 @@
       Mongolian Vowel Sep   count F: (should align)
     */
 
-/* */ /*
-        Hello from offset 6
-        Space 6+2:                     compare A
-        Mongolian Vowel Separator 6+2: compare B
-      */
-/*᠎*/ /*
-        Hello from another offset 6 with wchars establishing column offset
-        Space 6+2:                     compare C
-        Mongolian Vowel Separator 6+2: compare D
-      */
+
+
+    /* */
+
+    /*
+      Hello from offset 6
+      Space 6+2:                     compare A
+      Mongolian Vowel Separator 6+2: compare B
+    */
+
+    /*᠎*/
+
+    /*
+      Hello from another offset 6 with wchars establishing column offset
+      Space 6+2:                     compare C
+      Mongolian Vowel Separator 6+2: compare D
+    */
 }
 
 fn main() {
     // Taken from http://en.wikipedia.org/wiki/Whitespace_character
-    let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20',
-                 // '\x85', // for some reason Rust thinks NEL isn't whitespace
-                 '\xA0', '\u1680', '\u180E',
-                 '\u2000', '\u2001', '\u2002', '\u2003',
-                 '\u2004', '\u2005', '\u2006', '\u2007',
-                 '\u2008', '\u2009', '\u200A',
-                 '\u2028', '\u2029', '\u202F', '\u205F',
-                 '\u3000'
-                ];
+    let chars =
+        ['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
+         // '\x85', // for some reason Rust thinks NEL isn't whitespace
+         '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
+         '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
+         '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
+     // <= bugs in pretty-printer?
     for vec::each(chars) |c| {
-        io::println(fmt!("%? %?", c, c.is_whitespace()));
+        let ws = c.is_whitespace();
+        io::println(fmt!("%? %?" , c , ws));
     }
 }
diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs
index 421857850b50c..a56fa91f39f09 100644
--- a/src/test/pretty/block-comment-wchar.rs
+++ b/src/test/pretty/block-comment-wchar.rs
@@ -81,6 +81,7 @@ fn f() {
 ᠎᠎᠎᠎  Mongolian Vowel Sep   count F: (should align)
     */
 
+
 /* */ /*
         Hello from offset 6
         Space 6+2:                     compare A
@@ -95,16 +96,14 @@ fn f() {
 
 fn main() {
     // Taken from http://en.wikipedia.org/wiki/Whitespace_character
-    let chars = [ '\x0A', '\x0B', '\x0C', '\x0D', '\x20',
-                 // '\x85', // for some reason Rust thinks NEL isn't whitespace
-                 '\xA0', '\u1680', '\u180E',
-                 '\u2000', '\u2001', '\u2002', '\u2003',
-                 '\u2004', '\u2005', '\u2006', '\u2007',
-                 '\u2008', '\u2009', '\u200A',
-                 '\u2028', '\u2029', '\u202F', '\u205F',
-                 '\u3000'
-                ];
+    let chars =
+        ['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
+         // '\x85', // for some reason Rust thinks NEL isn't whitespace
+         '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
+         '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
+         '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
     for vec::each(chars) |c| {
-        io::println(fmt!("%? %?", c, c.is_whitespace()));
+        let ws = c.is_whitespace();
+        io::println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer?
     }
 }

From 876f6deb4af73d3a6a9845c8ca0a9edff0e25989 Mon Sep 17 00:00:00 2001
From: "Felix S. Klock II" <pnkfelix@pnkfx.org>
Date: Fri, 14 Jun 2013 09:36:03 +0200
Subject: [PATCH 4/4] fixed code to placate new restrictions on form of
 function/method invocations.

---
 src/libsyntax/parse/comments.rs        | 2 +-
 src/test/pretty/block-comment-wchar.pp | 4 ++--
 src/test/pretty/block-comment-wchar.rs | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index 29fac8f951d61..572b657d6a122 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -206,7 +206,7 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
     let mut col = col.to_uint();
     let mut cursor: uint = 0;
     while col > 0 && cursor < len {
-        let r: str::CharRange = str::char_range_at(s, cursor);
+        let r: str::CharRange = s.char_range_at(cursor);
         if !r.ch.is_whitespace() {
             return None;
         }
diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp
index 9c80057ccef02..911de166e8fc7 100644
--- a/src/test/pretty/block-comment-wchar.pp
+++ b/src/test/pretty/block-comment-wchar.pp
@@ -109,8 +109,8 @@
          '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
          '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
      // <= bugs in pretty-printer?
-    for vec::each(chars) |c| {
+    for chars.each |c| {
         let ws = c.is_whitespace();
-        io::println(fmt!("%? %?" , c , ws));
+        println(fmt!("%? %?" , c , ws));
     }
 }
diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs
index a56fa91f39f09..d8a820542a721 100644
--- a/src/test/pretty/block-comment-wchar.rs
+++ b/src/test/pretty/block-comment-wchar.rs
@@ -102,8 +102,8 @@ fn main() {
          '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
          '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
          '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
-    for vec::each(chars) |c| {
+    for chars.each |c| {
         let ws = c.is_whitespace();
-        io::println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer?
+        println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer?
     }
 }