mun-lang · baszalmstra · Feb 28, 2021 · Jan 15, 2021 · Feb 26, 2021 · Wodann
diff --git a/crates/mun_codegen/src/test.rs b/crates/mun_codegen/src/test.rs
@@ -969,7 +969,7 @@ fn test_snapshot_with_optimization(text: &str, opt: OptimizationLevel) {
         messages.borrow_mut().push(format!(
             "error {}:{}: {}",
             line_col.line + 1,
-            line_col.col + 1,
+            line_col.col_utf16 + 1,
             diag.message()
         ));
     });

diff --git a/crates/mun_compiler/src/diagnostics_snippets.rs b/crates/mun_compiler/src/diagnostics_snippets.rs
@@ -36,8 +36,8 @@ pub(crate) fn emit_syntax_error(
             origin: Some(relative_file_path),
             annotations: vec![SourceAnnotation {
                 range: (
-                    location.offset().to_usize() - line_offset,
-                    location.end_offset().to_usize() - line_offset + 1,
+                    usize::from(location.offset()) - line_offset,
+                    usize::from(location.end_offset()) - line_offset + 1,
                 ),
                 label: &syntax_error_text,
                 annotation_type: AnnotationType::Error,
@@ -167,8 +167,8 @@ fn emit_diagnostic(
                         .iter()
                         .map(|annotation| SourceAnnotation {
                             range: (
-                                annotation.range.start().to_usize() - line_offset,
-                                annotation.range.end().to_usize() - line_offset,
+                                usize::from(annotation.range.start()) - line_offset,
+                                usize::from(annotation.range.end()) - line_offset,
                             ),
                             label: annotation.message.as_str(),
                             annotation_type: AnnotationType::Error,

diff --git a/...alidator/snapshots/mun_hir__expr__validator__tests__free_type_alias_without_type_ref.snap b/...alidator/snapshots/mun_hir__expr__validator__tests__free_type_alias_without_type_ref.snap
@@ -2,5 +2,5 @@
 source: crates/mun_hir/src/expr/validator/tests.rs
 expression: "type Foo; // `Foo` must have a target type"
 ---
-[0; 9): free type alias without type ref
+0..9: free type alias without type ref
 
diff --git a/...r/src/expr/validator/snapshots/mun_hir__expr__validator__tests__uninitialized_access.snap b/...r/src/expr/validator/snapshots/mun_hir__expr__validator__tests__uninitialized_access.snap
@@ -2,5 +2,5 @@
 source: crates/mun_hir/src/expr/validator/tests.rs
 expression: "fn foo() {\n    let a:int;\n    let b = a + 3;\n}"
 ---
-[38; 39): use of possibly-uninitialized variable
+38..39: use of possibly-uninitialized variable
 
diff --git a/...rc/expr/validator/snapshots/mun_hir__expr__validator__tests__uninitialized_access_if.snap b/...rc/expr/validator/snapshots/mun_hir__expr__validator__tests__uninitialized_access_if.snap
@@ -2,5 +2,5 @@
 source: crates/mun_hir/src/expr/validator/tests.rs
 expression: "fn foo() {\n    let a:int;\n    if true { a = 3; } else { a = 4; }\n    let b = a + 4;  // correct, `a` is initialized either way\n}\n\nfn bar() {\n    let a:int;\n    if true { a = 3; }\n    let b = a + 4;  // `a` is possibly-unitialized\n}\n\nfn baz() {\n    let a:int;\n    if true { return } else { a = 4 };\n    let b = a + 4;  // correct, `a` is initialized either way\n}\n\nfn foz() {\n    let a:int;\n    if true { a = 4 } else { return };\n    let b = a + 4;  // correct, `a` is initialized either way\n}\n\nfn boz() {\n    let a:int;\n    return;\n    let b = a + 4;  // `a` is not initialized but this is dead code anyway\n}"
 ---
-[191; 192): use of possibly-uninitialized variable
+191..192: use of possibly-uninitialized variable
 
diff --git a/...expr/validator/snapshots/mun_hir__expr__validator__tests__uninitialized_access_while.snap b/...expr/validator/snapshots/mun_hir__expr__validator__tests__uninitialized_access_while.snap
@@ -2,5 +2,5 @@
 source: crates/mun_hir/src/expr/validator/tests.rs
 expression: "fn foo(b:int) {\n    let a:int;\n    while b < 4 { b += 1; a = b; a += 1; }\n    let c = a + 4;  // `a` is possibly-unitialized\n}"
 ---
-[86; 87): use of possibly-uninitialized variable
+86..87: use of possibly-uninitialized variable
 
diff --git a/crates/mun_hir/src/expr/validator/tests.rs b/crates/mun_hir/src/expr/validator/tests.rs
@@ -84,7 +84,7 @@ fn diagnostics(content: &str) -> String {
     let mut diags = String::new();
 
     let mut diag_sink = DiagnosticSink::new(|diag| {
-        write!(diags, "{}: {}\n", diag.highlight_range(), diag.message()).unwrap();
+        write!(diags, "{:?}: {}\n", diag.highlight_range(), diag.message()).unwrap();
     });
 
     for item in Package::all(&db)

diff --git a/crates/mun_hir/src/in_file.rs b/crates/mun_hir/src/in_file.rs
@@ -7,7 +7,7 @@ use mun_syntax::SyntaxNode;
 ///
 /// * `InFile<SyntaxNode>` -- syntax node in a file
 /// * `InFile<ast::FnDef>` -- ast node in a file
-/// * `InFile<TextUnit>` -- offset in a file
+/// * `InFile<TextSize>` -- offset in a file
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
 pub struct InFile<T> {
     pub file_id: FileId,

diff --git a/crates/mun_hir/src/line_index.rs b/crates/mun_hir/src/line_index.rs
@@ -1,43 +1,128 @@
-use mun_syntax::TextUnit;
+use mun_syntax::TextSize;
 
-use superslice::Ext;
+use rustc_hash::FxHashMap;
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct LineIndex {
-    newlines: Vec<TextUnit>,
+    /// Offsets from the beginning of each line
+    newlines: Vec<TextSize>,
+
+    /// List of non-ASCII characters on each line
+    pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub struct LineCol {
+    /// The line index (zero-based)
     pub line: u32,
-    pub col: u32,
+
+    /// The column index when the text is represented as UTF16 text (zero-based)
+    pub col_utf16: u32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+pub(crate) struct Utf16Char {
+    /// Start offset of a character inside a line, zero-based
+    pub(crate) start: TextSize,
+
+    /// End offset of a character inside a line, zero-based
+    pub(crate) end: TextSize,
+}
+
+impl Utf16Char {
+    /// Returns the length in 8-bit UTF-8 code units.
+    fn len(&self) -> TextSize {
+        self.end - self.start
+    }
+
+    /// Returns the length in 16-bit UTF-16 code units.
+    fn len_utf16(&self) -> usize {
+        if self.len() == TextSize::from(4) {
+            2
+        } else {
+            1
+        }
+    }
 }
 
 impl LineIndex {
     pub fn new(text: &str) -> LineIndex {
+        let mut utf16_lines = FxHashMap::default();
+        let mut utf16_chars = Vec::new();
+
+        // Iterate over all the characters in the text and record all the newlines and UTF16
+        // characters.
         let mut newlines = vec![0.into()];
         let mut curr_row = 0.into();
+        let mut curr_col = 0.into();
+        let mut line = 0;
         for c in text.chars() {
-            curr_row += TextUnit::of_char(c);
+            let c_len = TextSize::of(c);
+            curr_row += c_len;
             if c == '\n' {
                 newlines.push(curr_row);
+
+                // Save any utf-16 characters seen in the previous line
+                if !utf16_chars.is_empty() {
+                    utf16_lines.insert(line, utf16_chars);
+                    utf16_chars = Vec::new();
+                }
+
+                // Prepare for processing the next line
+                curr_col = 0.into();
+                line += 1;
+                continue;
             }
+
+            if !c.is_ascii() {
+                utf16_chars.push(Utf16Char {
+                    start: curr_col,
+                    end: curr_col + c_len,
+                });
+            }
+
+            curr_col += c_len;
         }
 
-        LineIndex { newlines }
+        // Save any utf-16 characters seen in the last line
+        if !utf16_chars.is_empty() {
+            utf16_lines.insert(line, utf16_chars);
+        }
+
+        LineIndex {
+            newlines,
+            utf16_lines,
+        }
     }
 
-    pub fn line_col(&self, offset: TextUnit) -> LineCol {
-        let line = self.newlines.upper_bound(&offset) - 1;
+    /// Returns the line and column index at the given offset in the text
+    pub fn line_col(&self, offset: TextSize) -> LineCol {
+        let line = self
+            .newlines
+            .binary_search_by(|x| {
+                if x <= &offset {
+                    std::cmp::Ordering::Less
+                } else {
+                    std::cmp::Ordering::Greater
+                }
+            })
+            .unwrap_or_else(|i| i)
+            - 1;
         let line_start_offset = self.newlines[line];
         let col = offset - line_start_offset;
 
         LineCol {
             line: line as u32,
-            col: col.to_usize() as u32,
+            col_utf16: self.utf8_to_utf16_col(line as u32, col) as u32,
         }
     }
 
+    /// Returns the offset in the text for the given line and column index
+    pub fn offset(&self, line_col: LineCol) -> TextSize {
+        let col = self.utf16_to_utf8_col(line_col.line, line_col.col_utf16);
+        self.newlines[line_col.line as usize] + col
+    }
+
     /// Retrieves the text between `first_line` and `last_line`, if any.
     pub fn text_part<'a>(
         &self,
@@ -46,19 +131,53 @@ impl LineIndex {
         text: &'a str,
         text_len: usize,
     ) -> Option<&'a str> {
-        let start_of_part = self.newlines.get(first_line as usize)?.to_usize();
+        let start_of_part = (*self.newlines.get(first_line as usize)?).into();
         let end_of_part = self
             .newlines
             .get(last_line as usize + 1)
-            .map(|u| u.to_usize() - 1)
+            .map(|u| usize::from(*u) - 1usize)
             .unwrap_or(text_len);
         Some(&text[start_of_part..end_of_part])
     }
 
     /// Retrieves the offset to the line corresponding to `line_index`.
     #[inline]
     pub fn line_offset(&self, line_index: u32) -> usize {
-        self.newlines[line_index as usize].to_usize()
+        self.newlines[line_index as usize].into()
+    }
+
+    /// Given a line and column number for utf16 text convert it to the offset in utf8 text.
+    fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
+        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
+            for c in utf16_chars {
+                if col > u32::from(c.start) {
+                    col += u32::from(c.len()) - c.len_utf16() as u32;
+                } else {
+                    // From here on, all utf16 characters come *after* the character we are mapping,
+                    // so we don't need to take them into account
+                    break;
+                }
+            }
+        }
+
+        col.into()
+    }
+
+    /// Given a line and column number for utf8 text, convert it to the offset in utf16 text.
+    fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
+        let mut res: usize = col.into();
+        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
+            for c in utf16_chars {
+                if c.end <= col {
+                    res -= usize::from(c.len()) - c.len_utf16();
+                } else {
+                    // From here on, all utf16 characters come *after* the character we are mapping,
+                    // so we don't need to take them into account
+                    break;
+                }
+            }
+        }
+        res
     }
 }
 #[cfg(test)]
@@ -69,11 +188,41 @@ mod tests {
     fn test_line_index() {
         let text = "hello\nworld";
         let index = LineIndex::new(text);
-        assert_eq!(index.line_col(0.into()), LineCol { line: 0, col: 0 });
-        assert_eq!(index.line_col(1.into()), LineCol { line: 0, col: 1 });
-        assert_eq!(index.line_col(5.into()), LineCol { line: 0, col: 5 });
-        assert_eq!(index.line_col(6.into()), LineCol { line: 1, col: 0 });
-        assert_eq!(index.line_col(7.into()), LineCol { line: 1, col: 1 });
+        assert_eq!(
+            index.line_col(0.into()),
+            LineCol {
+                line: 0,
+                col_utf16: 0
+            }
+        );
+        assert_eq!(
+            index.line_col(1.into()),
+            LineCol {
+                line: 0,
+                col_utf16: 1
+            }
+        );
+        assert_eq!(
+            index.line_col(5.into()),
+            LineCol {
+                line: 0,
+                col_utf16: 5
+            }
+        );
+        assert_eq!(
+            index.line_col(6.into()),
+            LineCol {
+                line: 1,
+                col_utf16: 0
+            }
+        );
+        assert_eq!(
+            index.line_col(7.into()),
+            LineCol {
+                line: 1,
+                col_utf16: 1
+            }
+        );
     }
     #[test]
     fn test_text_part() {
@@ -88,6 +237,24 @@ mod tests {
         );
         assert_eq!(index.text_part(0, 2, &text, text_len), Some(text));
     }
+    #[test]
+    fn test_text_part_utf16() {
+        let text = "a\n❤️\nb";
+        let index = LineIndex::new(text);
+        let start = index.offset(LineCol {
+            line: 1,
+            col_utf16: 0,
+        });
+        let end = index.offset(LineCol {
+            line: 1,
+            col_utf16: 1,
+        });
+        assert_eq!(
+            index.text_part(1, 1, &text, (end - start).into()),
+            Some("❤️")
+        );
+    }
+
     #[test]
     fn test_line_offset() {
         let text = "for\ntest\npurpose";

diff --git a/crates/mun_hir/src/package_defs/snapshots/mun_hir__package_defs__tests__use_cyclic.snap b/crates/mun_hir/src/package_defs/snapshots/mun_hir__package_defs__tests__use_cyclic.snap
@@ -4,11 +4,11 @@ expression: "//- /foo.mun\nuse super::baz::Cyclic;\n\npub struct Ok;\n\n//- /bar
 ---
 mod mod
 +-- mod bar
-|   +-- ERROR: [17; 23): unresolved import
+|   +-- ERROR: 17..23: unresolved import
 |   '-- use struct package::foo::Ok
 +-- mod baz
-|   +-- ERROR: [17; 23): unresolved import
+|   +-- ERROR: 17..23: unresolved import
 |   '-- use struct package::foo::Ok
 '-- mod foo
-    +-- ERROR: [4; 22): unresolved import
+    +-- ERROR: 4..22: unresolved import
     '-- struct Ok
diff --git a/.../mun_hir/src/package_defs/snapshots/mun_hir__package_defs__tests__use_duplicate_name.snap b/.../mun_hir/src/package_defs/snapshots/mun_hir__package_defs__tests__use_duplicate_name.snap
@@ -4,7 +4,7 @@ expression: "//- /foo.mun\npub struct Ok;\n\n//- /bar.mun\nuse package::foo::Ok;
 ---
 mod mod
 +-- mod bar
-|   +-- ERROR: [4; 20): a second item with the same name imported. Try to use an alias.
+|   +-- ERROR: 4..20: a second item with the same name imported. Try to use an alias.
 |   '-- struct Ok
 '-- mod foo
     '-- struct Ok
diff --git a/crates/mun_hir/src/package_defs/snapshots/mun_hir__package_defs__tests__use_unresolved.snap b/crates/mun_hir/src/package_defs/snapshots/mun_hir__package_defs__tests__use_unresolved.snap
@@ -3,8 +3,8 @@ source: crates/mun_hir/src/package_defs/tests.rs
 expression: "//- /foo.mun\npub struct Foo;\n\n//- /mod.mun\nuse foo::Foo;   // works\nuse foo::Bar;   // doesnt work (Bar does not exist)\nuse baz::Baz;   // doesnt work (baz does not exist)"
 ---
 mod mod
-+-- ERROR: [29; 37): unresolved import
-+-- ERROR: [81; 89): unresolved import
++-- ERROR: 29..37: unresolved import
++-- ERROR: 81..89: unresolved import
 +-- use struct package::foo::Foo
 '-- mod foo
     '-- struct Foo
diff --git a/crates/mun_hir/src/package_defs/tests.rs b/crates/mun_hir/src/package_defs/tests.rs
@@ -181,7 +181,7 @@ fn tree_for_module(
     // Add module level diagnostics
     let mut diag_sink = DiagnosticSink::new(|diag| {
         node.push(format!(
-            "ERROR: {}: {}",
+            "ERROR: {:?}: {}",
             diag.highlight_range(),
             diag.message()
         ));