Skip to content

Commit

Permalink
Don't consider newlines to be words in the line parser
Browse files Browse the repository at this point in the history
This causes us to match up unrelated lines, and doesn't make sense for
a line parser.

Improves #90.
  • Loading branch information
Wilfred committed Jan 23, 2022
1 parent e060ab3 commit de89caa
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 22 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ Improved diffing performance (both time and memory usage).
Sliders are now fixed up after diffing. This produces better looking
results in more cases, and makes the primary diffing faster.

Fixed some corner cases in the line parser where it would match up
isolated newline character as unchanged, leading to weird alignment.

## 0.15 (released 6 January 2022)

### Parsing
Expand Down
2 changes: 1 addition & 1 deletion src/hunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ pub fn aligned_lines_from_hunk(
max_lhs_src_line: LineNumber,
max_rhs_src_line: LineNumber,
) -> Vec<(Option<LineNumber>, Option<LineNumber>)> {
let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = hunk.lines.clone();
let hunk_lines: Vec<(Option<LineNumber>, Option<LineNumber>)> = dbg!(hunk.lines.clone());

// TODO: this largely duplicates add_context().
let before_context = calculate_before_context(&hunk_lines, opposite_to_lhs, opposite_to_rhs);
Expand Down
62 changes: 41 additions & 21 deletions src/line_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,31 +130,35 @@ pub fn change_positions(lhs_src: &str, rhs_src: &str) -> Vec<MatchedPos> {
for diff_res in diff::slice(&split_words(&lhs_part), &split_words(&rhs_part)) {
match diff_res {
diff::Result::Left(lhs_word) => {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
res.push(MatchedPos {
// TODO: rename this kind to reflect
// that it's used for both code
// comments and plain text.
kind: MatchKind::ChangedCommentPart {},
pos: lhs_pos[0],
});
if lhs_word != "\n" {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
res.push(MatchedPos {
// TODO: rename this kind to reflect
// that it's used for both code
// comments and plain text.
kind: MatchKind::ChangedCommentPart {},
pos: lhs_pos[0],
});
}

lhs_offset += lhs_word.len();
}
diff::Result::Both(lhs_word, rhs_word) => {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
let rhs_pos =
rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());

res.push(MatchedPos {
kind: MatchKind::UnchangedCommentPart {
self_pos: lhs_pos[0],
opposite_pos: rhs_pos,
},
pos: lhs_pos[0],
});
if lhs_word != "\n" {
let lhs_pos =
lhs_nlp.from_offsets(lhs_offset, lhs_offset + lhs_word.len());
let rhs_pos =
rhs_nlp.from_offsets(rhs_offset, rhs_offset + rhs_word.len());

res.push(MatchedPos {
kind: MatchKind::UnchangedCommentPart {
self_pos: lhs_pos[0],
opposite_pos: rhs_pos,
},
pos: lhs_pos[0],
});
}

lhs_offset += lhs_word.len();
rhs_offset += rhs_word.len();
Expand Down Expand Up @@ -191,6 +195,22 @@ mod tests {
assert!(!positions[0].kind.is_change());
}

#[test]
fn test_no_changes_trailing_newlines() {
let positions = change_positions("foo\n", "foo\n");

assert_eq!(positions.len(), 1);
assert!(!positions[0].kind.is_change());
}

#[test]
fn test_novel_lhs_trailing_newlines() {
let positions = change_positions("foo\n", "");

assert_eq!(positions.len(), 1);
assert!(positions[0].kind.is_change());
}

#[test]
fn test_positions_novel_lhs() {
let positions = change_positions("foo", "");
Expand Down

0 comments on commit de89caa

Please sign in to comment.