Don't consider newlines to be words in the line parser

This causes us to match up unrelated lines, and doesn't make sense for a line parser. Improves #90.
Wilfred · Jan 23, 2022 · de89caa · de89caa
1 parent e060ab3
commit de89caa
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 22 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -33,6 +33,9 @@ Improved diffing performance (both time and memory usage).
 Sliders are now fixed up after diffing. This produces better looking
 results in more cases, and makes the primary diffing faster.
 
+Fixed some corner cases in the line parser where it would match up
+isolated newline character as unchanged, leading to weird alignment.
+
 ## 0.15 (released 6 January 2022)
 
 ### Parsing

diff --git a/src/hunks.rs b/src/hunks.rs
@@ -663,7 +663,7 @@ pub fn aligned_lines_from_hunk(
     max_lhs_src_line: LineNumber,
     max_rhs_src_line: LineNumber,
 ) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
-    let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = hunk.lines.clone();
+    let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = dbg!(hunk.lines.clone());
 
     // TODO: this largely duplicates add_context().
     let before_context = calculate_before_context(&hunk_lines, opposite_to_lhs, opposite_to_rhs);

diff --git a/src/line_parser.rs b/src/line_parser.rs
@@ -130,31 +130,35 @@ pub fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> {
                 for diff_res in diff::slice(&split_words(&lhs_part), &split_words(&rhs_part)) {
                     match diff_res {
                         diff::Result::Left(lhs_word) => {
-                            let lhs_pos =
-                                lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
-                            res.push(MatchedPos {
-                                // TODO: rename this kind to reflect
-                                // that it's used for both code
-                                // comments and plain text.
-                                kind: MatchKind::ChangedCommentPart {},
-                                pos: lhs_pos[0],
-                            });
+                            if lhs_word != "\n" {
+                                let lhs_pos =
+                                    lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
+                                res.push(MatchedPos {
+                                    // TODO: rename this kind to reflect
+                                    // that it's used for both code
+                                    // comments and plain text.
+                                    kind: MatchKind::ChangedCommentPart {},
+                                    pos: lhs_pos[0],
+                                });
+                            }
 
                             lhs_offset += lhs_word.len();
                         }
                         diff::Result::Both(lhs_word, rhs_word) => {
-                            let lhs_pos =
-                                lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
-                            let rhs_pos =
-                                rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());
-
-                            res.push(MatchedPos {
-                                kind: MatchKind::UnchangedCommentPart {
-                                    self_pos: lhs_pos[0],
-                                    opposite_pos: rhs_pos,
-                                },
-                                pos: lhs_pos[0],
-                            });
+                            if lhs_word != "\n" {
+                                let lhs_pos =
+                                    lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
+                                let rhs_pos =
+                                    rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());
+
+                                res.push(MatchedPos {
+                                    kind: MatchKind::UnchangedCommentPart {
+                                        self_pos: lhs_pos[0],
+                                        opposite_pos: rhs_pos,
+                                    },
+                                    pos: lhs_pos[0],
+                                });
+                            }
 
                             lhs_offset += lhs_word.len();
                             rhs_offset += rhs_word.len();
@@ -191,6 +195,22 @@ mod tests {
         assert!(!positions[0].kind.is_change());
     }
 
+    #[test]
+    fn test_no_changes_trailing_newlines() {
+        let positions = change_positions("foo\n", "foo\n");
+
+        assert_eq!(positions.len(), 1);
+        assert!(!positions[0].kind.is_change());
+    }
+
+    #[test]
+    fn test_novel_lhs_trailing_newlines() {
+        let positions = change_positions("foo\n", "");
+
+        assert_eq!(positions.len(), 1);
+        assert!(positions[0].kind.is_change());
+    }
+
     #[test]
     fn test_positions_novel_lhs() {
         let positions = change_positions("foo", "");