diff --git a/diffmatchpatch/patch.go b/diffmatchpatch/patch.go index 0dbe3bd..275d918 100644 --- a/diffmatchpatch/patch.go +++ b/diffmatchpatch/patch.go @@ -11,6 +11,7 @@ package diffmatchpatch import ( "bytes" "errors" + "fmt" "math" "net/url" "regexp" @@ -31,26 +32,8 @@ type Patch struct { // Header: @@ -382,8 +481,9 @@ // Indices are printed as 1-based, not 0-based. func (p *Patch) String() string { - var coords1, coords2 string - - if p.Length1 == 0 { - coords1 = strconv.Itoa(p.Start1) + ",0" - } else if p.Length1 == 1 { - coords1 = strconv.Itoa(p.Start1 + 1) - } else { - coords1 = strconv.Itoa(p.Start1+1) + "," + strconv.Itoa(p.Length1) - } - - if p.Length2 == 0 { - coords2 = strconv.Itoa(p.Start2) + ",0" - } else if p.Length2 == 1 { - coords2 = strconv.Itoa(p.Start2 + 1) - } else { - coords2 = strconv.Itoa(p.Start2+1) + "," + strconv.Itoa(p.Length2) - } - var text bytes.Buffer - _, _ = text.WriteString("@@ -" + coords1 + " +" + coords2 + " @@\n") + _, _ = text.WriteString(p.header()) // Escape the body of the patch with %xx notation. for _, aDiff := range p.diffs { @@ -70,6 +53,23 @@ func (p *Patch) String() string { return unescaper.Replace(text.String()) } +func (p Patch) header() string { + return fmt.Sprintf("@@ -%s +%s @@\n", + p.coords(p.Start1, p.Length1), + p.coords(p.Start2, p.Length2)) +} + +func (Patch) coords(start, length int) string { + switch { + case length == 0: + return fmt.Sprintf("%d,0", start) + case length == 1: + return fmt.Sprintf("%d", start+1) + default: + return fmt.Sprintf("%d,%d", start+1, length) + } +} + // PatchAddContext increases the context until it is unique, but doesn't let the pattern expand beyond MatchMaxBits. func (dmp *DiffMatchPatch) PatchAddContext(patch Patch, text string) Patch { if len(text) == 0 { diff --git a/diffmatchpatch/unified.go b/diffmatchpatch/unified.go new file mode 100644 index 0000000..ad064a7 --- /dev/null +++ b/diffmatchpatch/unified.go @@ -0,0 +1,435 @@ +package diffmatchpatch + +import ( + "fmt" + "strings" +) + +// Unified computes the differences between text1 and text2 and formats the differences in the "unified diff" format. +// Optionally pass UnifiedOption to set the new/old labels and context lines. +func (dmp *DiffMatchPatch) Unified(text1, text2 string, opts ...UnifiedOption) string { + options := newUnifiedOptions(opts) + + text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2) + + diffs := dmp.DiffMain(text1Enc, text2Enc, false) + diffs = dmp.DiffCharsToLines(diffs, lines) + + unified := newUnified(diffs, options) + + return unified.String() +} + +// DiffUnified formats the diffs slice in the "unified diff" format. +// Optionally pass UnifiedOption to set the new/old labels and context lines. +func (dmp *DiffMatchPatch) DiffUnified(diffs []Diff, opts ...UnifiedOption) string { + options := newUnifiedOptions(opts) + + u := newUnified(diffs, options) + + return u.String() +} + +// newUnified takes a []Diff slice and converts into into a unified struct, which +// can then be used to produce the unified diff output using its String() +// method. +func newUnified(diffs []Diff, opts unifiedOptions) unified { + return unified{ + label1: opts.text1Label, + label2: opts.text2Label, + + patches: patchMakeUnified(diffs, opts.contextLines), + } +} + +func patchMakeUnified(diffs []Diff, contextLines int) []Patch { + maxCtx := contextLines * 2 + + var patches []Patch + + if diffIsEqual(diffs) { + return nil + } + + diffs = diffLinewise(diffs) + + var ( + patch Patch + + lineNo1 int + lineNo2 int + context []Diff + ) + for _, diff := range diffs { + switch diff.Type { + case DiffDelete: + lineNo1++ + case DiffInsert: + lineNo2++ + case DiffEqual: + lineNo1++ + lineNo2++ + } + + if diff.Type == DiffEqual { + context = append(context, diff) + continue + } + + // close previous patch + if len(patch.diffs) != 0 && len(context) > maxCtx { + cl := min(len(context), contextLines) + + patch.diffs = append(patch.diffs, context[:cl]...) + + patchUpdateLength(&patch) + + patches = append(patches, patch) + patch = Patch{} + } + + // start new patch + if len(patch.diffs) == 0 { + cl := min(len(context), contextLines) + + l1 := lineNo1 - cl + l2 := lineNo2 - cl + + // When starting a new patch, the line number for lineNo1 XOR lineNo2 + // as already been advanced, but not the other. Account for that in + // l1 or l2. + switch diff.Type { + case DiffDelete: + l1-- + case DiffInsert: + l2-- + } + + patch = Patch{ + Start1: l1, + Start2: l2, + diffs: context[len(context)-cl:], + } + + context = nil + } + + patch.diffs = append(patch.diffs, context...) + context = nil + + patch.diffs = append(patch.diffs, diff) + } + + // close last hunk + if len(patch.diffs) != 0 { + cl := min(len(context), contextLines) + + patch.diffs = append(patch.diffs, context[:cl]...) + + patchUpdateLength(&patch) + + patches = append(patches, patch) + patch = Patch{} + } + + return patches +} + +func patchUpdateLength(p *Patch) { + p.Length1 = 0 + p.Length2 = 0 + + for _, diff := range p.diffs { + switch diff.Type { + case DiffDelete: + p.Length1++ + case DiffInsert: + p.Length2++ + case DiffEqual: + p.Length1++ + p.Length2++ + } + } +} + +func diffIsEqual(diffs []Diff) bool { + for _, diff := range diffs { + if diff.Type != DiffEqual { + return false + } + } + + return true +} + +// diffLinewise splits and merged diffs so that each individual diff represents one line, including the final newline character. +func diffLinewise(diffs []Diff) []Diff { + var ( + ret []Diff + line1, line2 string + ) + + diffs = diffCleanupNewline(diffs) + + add := func(d Diff) { + switch d.Type { + case DiffDelete: + line1 = line1 + d.Text + case DiffInsert: + line2 = line2 + d.Text + default: // equal + line1 = line1 + d.Text + line2 = line2 + d.Text + } + + if strings.HasSuffix(line1, "\n") && line1 == line2 { + ret = append(ret, Diff{ + Type: DiffEqual, + Text: line1, + }) + + line1, line2 = "", "" + } + + if strings.HasSuffix(line1, "\n") { + ret = append(ret, Diff{ + Type: DiffDelete, + Text: line1, + }) + + line1 = "" + } + + if strings.HasSuffix(line2, "\n") { + ret = append(ret, Diff{ + Type: DiffInsert, + Text: line2, + }) + + line2 = "" + } + } + + for _, diff := range diffs { + for _, segment := range strings.SplitAfter(diff.Text, "\n") { + add(Diff{ + Type: diff.Type, + Text: segment, + }) + } + } + + // line1 and/or line2 may be non-empty if there is no newline at the end of file. + if line1 != "" && line1 == line2 { + ret = append(ret, Diff{ + Type: DiffEqual, + Text: line1, + }) + + line1, line2 = "", "" + } + + if line1 != "" { + ret = append(ret, Diff{ + Type: DiffDelete, + Text: line1, + }) + + line1 = "" + } + + if line2 != "" { + ret = append(ret, Diff{ + Type: DiffInsert, + Text: line2, + }) + + line2 = "" + } + + return reorderDeletionsFirst(ret) +} + +// diffCleanupNewline looks for single edits surrounded on both sides by equalities which can be shifted sideways to align on newlines. +func diffCleanupNewline(diffs []Diff) []Diff { + var ret []Diff + + for i := 0; i < len(diffs); i++ { + if i < len(diffs)-2 && diffs[i].Type == DiffEqual && diffs[i+1].Type != DiffEqual && diffs[i+2].Type == DiffEqual { + common := prefixWithNewline(diffs[i+1].Text, diffs[i+2].Text) + + // Convert ["=", "±", "="] + // to ["=", "±", "="] + if common != "" { + ret = append(ret, + Diff{ + Type: DiffEqual, + Text: diffs[i].Text + common, + }, + Diff{ + Type: diffs[i+1].Type, + Text: strings.TrimPrefix(diffs[i+1].Text, common) + common, + }, + Diff{ + Type: DiffEqual, + Text: strings.TrimPrefix(diffs[i+2].Text, common), + }, + ) + + i += 2 + continue + } + } + + ret = append(ret, diffs[i]) + } + + return ret +} + +// prefixWithNewline returns the longest common prefix between text1 and text2, up to and including a newline character. +// If text1 and text2 do not have a common prefix, or the common prefix does not include a newline character, the empty string is returned. +func prefixWithNewline(text1, text2 string) string { + prefix := New().DiffCommonPrefix(text1, text2) + + index := strings.LastIndex(text1[:prefix], "\n") + if index != -1 { + return text1[:index+1] + } + + return "" +} + +// reorderDeletionsFirst reorders changes so that deletions come before insertions, without crossing an equality boundary. +func reorderDeletionsFirst(diffs []Diff) []Diff { + var ( + ret []Diff + deletions []Diff + insertions []Diff + ) + + for _, diff := range diffs { + switch diff.Type { + case DiffDelete: + deletions = append(deletions, diff) + case DiffInsert: + insertions = append(insertions, diff) + case DiffEqual: + ret = append(ret, deletions...) + deletions = nil + + ret = append(ret, insertions...) + insertions = nil + + ret = append(ret, diff) + } + } + + ret = append(ret, deletions...) + ret = append(ret, insertions...) + + return ret +} + +// unified represents modifications in a form conducive to printing a unified diff. +type unified struct { + label1, label2 string + + patches []Patch +} + +// String converts a unified diff to the standard textual form for that diff. +// The output of this function can be passed to tools like patch. +func (u unified) String() string { + if len(u.patches) == 0 { + return "" + } + + var b strings.Builder + fmt.Fprintf(&b, "--- %s\n", u.label1) + fmt.Fprintf(&b, "+++ %s\n", u.label2) + + for _, patch := range u.patches { + fmt.Fprint(&b, patchFormatUnified(patch)) + } + + return b.String() +} + +// patchFormatUnified implements GNU's unified diff format. +// This differs from Patch.String() in that this function assumes that each Diff +// (except possibly the last ones) ends in a newline. If either input does not +// end with a newline character, an appropriate message will be printed. +// The output is not URL encoded. +func patchFormatUnified(p Patch) string { + var b strings.Builder + + fmt.Fprint(&b, p.header()) + + for _, diff := range p.diffs { + var prefix string + switch diff.Type { + case DiffDelete: + prefix = "-" + case DiffInsert: + prefix = "+" + case DiffEqual: + prefix = " " + } + + fmt.Fprint(&b, prefix, diff.Text) + + if !strings.HasSuffix(diff.Text, "\n") { + fmt.Fprint(&b, "\n\\ No newline at end of file\n") + } + } + + return b.String() +} + +// DefaultContextLines is the number of unchanged lines of surrounding +// context displayed by Unified. +const DefaultContextLines = 3 + +// UnifiedOption is an option for DiffUnified(). +type UnifiedOption func(*unifiedOptions) + +type unifiedOptions struct { + contextLines int + text1Label string + text2Label string +} + +func newUnifiedOptions(opts []UnifiedOption) unifiedOptions { + ret := unifiedOptions{ + contextLines: DefaultContextLines, + text1Label: "text1", + text2Label: "text2", + } + + for _, o := range opts { + o(&ret) + } + + return ret +} + +// UnifiedContextLines sets the number of unchanged lines of surrounding context +// printed. Defaults to DefaultContextLines. +func UnifiedContextLines(lines int) UnifiedOption { + if lines <= 0 { + lines = DefaultContextLines + } + + return func(o *unifiedOptions) { + o.contextLines = lines + } +} + +// UnifiedLabels sets the labels for the old and new files. Defaults to "text1" and "text2". +func UnifiedLabels(oldLabel, newLabel string) UnifiedOption { + return func(o *unifiedOptions) { + o.text1Label = oldLabel + o.text2Label = newLabel + } +} diff --git a/diffmatchpatch/unified_test.go b/diffmatchpatch/unified_test.go new file mode 100644 index 0000000..47270a9 --- /dev/null +++ b/diffmatchpatch/unified_test.go @@ -0,0 +1,274 @@ +package diffmatchpatch_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/sergi/go-diff/diffmatchpatch" +) + +func TestDiffUnified(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + text1 string + text2 string + want string + }{ + { + name: "No changes", + text1: "Hello, world!\n", + text2: "Hello, world!\n", + want: "", + }, + { + name: "Insertion at beginning", + text1: "Hello, world!\n", + text2: "New line\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1 +1,2 @@\n+New line\n Hello, world!\n", + }, + { + name: "Insertion at end", + text1: "Hello, world!\n", + text2: "Hello, world!\nNew line\n", + want: "--- text1\n+++ text2\n@@ -1 +1,2 @@\n Hello, world!\n+New line\n", + }, + { + name: "Insertion middle", + text1: "Hello, world!\nHello, world!\n", + text2: "Hello, world!\nNew line\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1,3 @@\n Hello, world!\n+New line\n Hello, world!\n", + }, + { + name: "Removal at beginning", + text1: "Old line\nHello, world!\n", + text2: "Hello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1 @@\n-Old line\n Hello, world!\n", + }, + { + name: "Removal at end", + text1: "Hello, world!\nOld line\n", + text2: "Hello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1 @@\n Hello, world!\n-Old line\n", + }, + { + name: "Removal middle", + text1: "Hello, world!\nOld line\nHello, world!\n", + text2: "Hello, world!\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,3 +1,2 @@\n Hello, world!\n-Old line\n Hello, world!\n", + }, + { + name: "Replacement", + text1: "Prefix\nHello, world!\nSuffix\n", + text2: "Prefix\nHello, Golang!\nSuffix\n", + want: "--- text1\n+++ text2\n@@ -1,3 +1,3 @@\n Prefix\n-Hello, world!\n+Hello, Golang!\n Suffix\n", + }, + { + name: "Insertion", + text1: makeContext(10, 0), + text2: makeContext(5, 0) + "INSERTION\n" + makeContext(5, 5), + want: "--- text1\n+++ text2\n@@ -3,6 +3,7 @@\n context2\n context3\n context4\n+INSERTION\n context5\n context6\n context7\n", + }, + { + name: "Multiple hunks", + text1: makeContext(20, 0), + text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(10, 5) + "INSERTION2\n" + makeContext(5, 15), + want: `--- text1 ++++ text2 +@@ -3,6 +3,7 @@ + context2 + context3 + context4 ++INSERTION1 + context5 + context6 + context7 +@@ -13,6 +14,7 @@ + context12 + context13 + context14 ++INSERTION2 + context15 + context16 + context17 +`, + }, + { + name: "Merge hunk with <= 5 lines of context", + text1: makeContext(15, 0), + text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(5, 5) + "INSERTION2\n" + makeContext(5, 10), + want: `--- text1 ++++ text2 +@@ -3,11 +3,13 @@ + context2 + context3 + context4 ++INSERTION1 + context5 + context6 + context7 + context8 + context9 ++INSERTION2 + context10 + context11 + context12 +`, + }, + { + name: "Insert without newline", + text1: "context1", + text2: "context1\nnew line", + want: `--- text1 ++++ text2 +@@ -1 +1,2 @@ +-context1 +\ No newline at end of file ++context1 ++new line +\ No newline at end of file +`, + }, + { + name: "Removal without newline", + text1: "context1\nold line", + text2: "context1", + want: `--- text1 ++++ text2 +@@ -1,2 +1 @@ +-context1 +-old line +\ No newline at end of file ++context1 +\ No newline at end of file +`, + }, + { + name: "context without newline", + text1: "context0\nold1\ncontext1", + text2: "context0\nnew1\ncontext1", + want: `--- text1 ++++ text2 +@@ -1,3 +1,3 @@ + context0 +-old1 ++new1 + context1 +\ No newline at end of file +`, + }, + { + name: "Replace multiple subsequent lines", + text1: makeContext(5, 0) + "old1\nold2\nold3\n" + makeContext(5, 5), + text2: makeContext(5, 0) + "new1\nnew2\nnew3\n" + makeContext(5, 5), + want: `--- text1 ++++ text2 +@@ -3,9 +3,9 @@ + context2 + context3 + context4 +-old1 +-old2 +-old3 ++new1 ++new2 ++new3 + context5 + context6 + context7 +`, + }, + { + name: "empty text1", + text1: "", + text2: "new1\n", + want: `--- text1 ++++ text2 +@@ -0,0 +1 @@ ++new1 +`, + }, + { + name: "empty text2", + text1: "old1\n", + text2: "", + want: `--- text1 ++++ text2 +@@ -1 +0,0 @@ +-old1 +`, + }, + } + + for _, tc := range cases { + // Un-alias tc for compatibility with Go <1.22. + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + dmp := diffmatchpatch.New() + + got := dmp.Unified(tc.text1, tc.text2, diffmatchpatch.UnifiedLabels("text1", "text2")) + + t.Logf("dmp.Unified() =\n%s", got) + + if got != tc.want { + t.Errorf("Unified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got)) + } + + // DiffLinesToChars / DiffCharsToLines is not required for correct results. + diffs := dmp.DiffMain(tc.text1, tc.text2, false) + + got = dmp.DiffUnified(diffs, diffmatchpatch.UnifiedLabels("text1", "text2"), diffmatchpatch.UnifiedContextLines(3)) + if got != tc.want { + t.Errorf("DiffUnified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got)) + } + + }) + } +} + +func makeContext(n, start int) string { + var b strings.Builder + + for i := start; i < start+n; i++ { + fmt.Fprintf(&b, "context%d\n", i) + } + + return b.String() +} + +func ExampleDiffMatchPatch_DiffUnified() { + text1 := "Prefix\nHello, world!\nSuffix\n" + text2 := "Prefix\nHello, Golang!\nSuffix\n" + + dmp := diffmatchpatch.New() + + // Pre-process the inputs so that each codepoint in text[12]End represents one line. + text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2) + + // Run the diff algorithm on the preprocessed inputs. + diffs := dmp.DiffMain(text1Enc, text2Enc, false) + + // Expand the diffs back into the full lines they represent. + diffs = dmp.DiffCharsToLines(diffs, lines) + + // Format as unified diff. + unifiedDiff := dmp.DiffUnified(diffs, + diffmatchpatch.UnifiedLabels("old.txt", "new.txt"), + diffmatchpatch.UnifiedContextLines(3)) + + fmt.Print(unifiedDiff) + // Output: + // --- old.txt + // +++ new.txt + // @@ -1,3 +1,3 @@ + // Prefix + // -Hello, world! + // +Hello, Golang! + // Suffix +} diff --git a/go.mod b/go.mod index c7886ce..23378c1 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,7 @@ module github.com/sergi/go-diff require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/google/go-cmp v0.6.0 github.com/kr/pretty v0.1.0 // indirect github.com/stretchr/testify v1.4.0 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect diff --git a/go.sum b/go.sum index 8dd9f36..737e654 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=