Skip to content

Commit

Permalink
Index out of range panic in DiffCharsToLines on large JSON diff
Browse files Browse the repository at this point in the history
  • Loading branch information
r-pai authored and sergi committed Dec 1, 2020
1 parent f6725a1 commit f9beae7
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 27 deletions.
23 changes: 11 additions & 12 deletions diffmatchpatch/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,13 +392,13 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
// DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
return string(chars1), string(chars2), lineArray
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return chars1, chars2, lineArray
}

// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
// DiffLinesToRunes splits two texts into a list of runes.
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
chars1, chars2, lineArray := dmp.DiffLinesToStrings(text1, text2)
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return []rune(chars1), []rune(chars2), lineArray
}

Expand Down Expand Up @@ -1308,8 +1308,8 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Di
return diffs, nil
}

// DiffLinesToStrings splits two texts into a list of strings. Each string represents one line.
func (dmp *DiffMatchPatch) DiffLinesToStrings(text1, text2 string) (string, string, []string) {
// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'

Expand All @@ -1324,14 +1324,13 @@ func (dmp *DiffMatchPatch) DiffLinesToStrings(text1, text2 string) (string, stri
return str1, str2, lineArray
}

// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []rune where each Unicode character represents one line.
// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []string {
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
lineStart := 0
lineEnd := -1
strings := []string{}
strs := []string{}

for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)
Expand All @@ -1345,13 +1344,13 @@ func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]str
lineValue, ok := lineHash[line]

if ok {
strings = append(strings, strconv.Itoa(lineValue))
strs = append(strs, strconv.Itoa(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
strings = append(strings, strconv.Itoa(len(*lineArray)-1))
strs = append(strs, strconv.Itoa(len(*lineArray)-1))
}
}

return strings
return strs
}
15 changes: 0 additions & 15 deletions diffmatchpatch/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1500,21 +1500,6 @@ func BenchmarkDiffMainRunesLargeLines(b *testing.B) {
}
}

func BenchmarkDiffMainStringsLargeLines(b *testing.B) {
s1, s2 := speedtestTexts()

dmp := New()

b.ResetTimer()

for i := 0; i < b.N; i++ {
text1, text2, linearray := dmp.DiffLinesToStrings(s1, s2)

diffs := dmp.DiffMain(text1, text2, false)
diffs = dmp.DiffCharsToLines(diffs, linearray)
}
}

func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) {
fp, _ := os.Open("../testdata/diff10klinestest.txt")
defer fp.Close()
Expand Down

0 comments on commit f9beae7

Please sign in to comment.