From f9beae76ac64a6f92ee8b73e1c96c71ef01a8d49 Mon Sep 17 00:00:00 2001 From: r-pai Date: Thu, 21 Nov 2019 11:19:22 +0530 Subject: [PATCH] Index out of range panic in DiffCharsToLines on large JSON diff --- diffmatchpatch/diff.go | 23 +++++++++++------------ diffmatchpatch/diff_test.go | 15 --------------- 2 files changed, 11 insertions(+), 27 deletions(-) diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go index b745f33..63a22a6 100644 --- a/diffmatchpatch/diff.go +++ b/diffmatchpatch/diff.go @@ -392,13 +392,13 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int, // DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line. // It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes. func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) { - chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2) - return string(chars1), string(chars2), lineArray + chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2) + return chars1, chars2, lineArray } -// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line. +// DiffLinesToRunes splits two texts into a list of runes. func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) { - chars1, chars2, lineArray := dmp.DiffLinesToStrings(text1, text2) + chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2) return []rune(chars1), []rune(chars2), lineArray } @@ -1308,8 +1308,8 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Di return diffs, nil } -// DiffLinesToStrings splits two texts into a list of strings. Each string represents one line. -func (dmp *DiffMatchPatch) DiffLinesToStrings(text1, text2 string) (string, string, []string) { +// diffLinesToStrings splits two texts into a list of strings. Each string represents one line. +func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) { // '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character. lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n' @@ -1324,14 +1324,13 @@ func (dmp *DiffMatchPatch) DiffLinesToStrings(text1, text2 string) (string, stri return str1, str2, lineArray } -// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []rune where each Unicode character represents one line. -// We use strings instead of []runes as input mainly because you can't use []rune as a map key. +// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string. func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []string { // Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect. lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4 lineStart := 0 lineEnd := -1 - strings := []string{} + strs := []string{} for lineEnd < len(text)-1 { lineEnd = indexOf(text, "\n", lineStart) @@ -1345,13 +1344,13 @@ func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]str lineValue, ok := lineHash[line] if ok { - strings = append(strings, strconv.Itoa(lineValue)) + strs = append(strs, strconv.Itoa(lineValue)) } else { *lineArray = append(*lineArray, line) lineHash[line] = len(*lineArray) - 1 - strings = append(strings, strconv.Itoa(len(*lineArray)-1)) + strs = append(strs, strconv.Itoa(len(*lineArray)-1)) } } - return strings + return strs } diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go index 2d6b60b..7fea3d7 100644 --- a/diffmatchpatch/diff_test.go +++ b/diffmatchpatch/diff_test.go @@ -1500,21 +1500,6 @@ func BenchmarkDiffMainRunesLargeLines(b *testing.B) { } } -func BenchmarkDiffMainStringsLargeLines(b *testing.B) { - s1, s2 := speedtestTexts() - - dmp := New() - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - text1, text2, linearray := dmp.DiffLinesToStrings(s1, s2) - - diffs := dmp.DiffMain(text1, text2, false) - diffs = dmp.DiffCharsToLines(diffs, linearray) - } -} - func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) { fp, _ := os.Open("../testdata/diff10klinestest.txt") defer fp.Close()