From 6f90939c9ae34a98073ea8c790bb128f08d7935d Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Sat, 23 Nov 2013 19:56:54 +0100 Subject: [PATCH] difflib: implement context diffs --- README.md | 3 +- difflib/difflib.go | 117 ++++++++++++++++++++++++++++++++++++++++ difflib/difflib_test.go | 105 ++++++++++++++++++++++++++++++++++++ 3 files changed, 224 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fe7b1e3..aa7dba9 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ The following class and functions have be ported: * `SequenceMatcher` * `unified_diff()` +* `context_diff()` -Related doctests have been ported as well. +Related doctests and unittests have been ported as well. I have barely used to code yet so do not consider it being production-ready. The API is likely to evolve too. diff --git a/difflib/difflib.go b/difflib/difflib.go index 396bcc2..ec0b826 100644 --- a/difflib/difflib.go +++ b/difflib/difflib.go @@ -626,3 +626,120 @@ func GetUnifiedDiffString(diff UnifiedDiff) (string, error) { err := WriteUnifiedDiff(w, diff) return string(w.Bytes()), err } + +// Convert range to the "ed" format. +func formatRangeContext(start, stop int) string { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning := start + 1 // lines start numbering with one + length := stop - start + if length == 0 { + beginning -= 1 // empty ranges begin at line just before the range + } + if length <= 1 { + return fmt.Sprintf("%d", beginning) + } + return fmt.Sprintf("%d,%d", beginning, beginning+length-1) +} + +type ContextDiff UnifiedDiff + +// Compare two sequences of lines; generate the delta as a context diff. +// +// Context diffs are a compact way of showing line changes and a few +// lines of context. The number of context lines is set by diff.Context +// which defaults to three. +// +// By default, the diff control lines (those with *** or ---) are +// created with a trailing newline. +// +// For inputs that do not have trailing newlines, set the diff.Eol +// argument to "" so that the output will be uniformly newline free. +// +// The context diff format normally has a header for filenames and +// modification times. Any or all of these may be specified using +// strings for diff.FromFile, diff.ToFile, diff.FromDate, diff.ToDate. +// The modification times are normally expressed in the ISO 8601 format. +// If not specified, the strings default to blanks. +func WriteContextDiff(writer io.Writer, diff ContextDiff) error { + buf := bufio.NewWriter(writer) + defer buf.Flush() + var diffErr error + w := func(format string, args ...interface{}) { + _, err := buf.WriteString(fmt.Sprintf(format, args...)) + if diffErr == nil && err != nil { + diffErr = err + } + } + + if len(diff.Eol) == 0 { + diff.Eol = "\n" + } + + prefix := map[byte]string{ + 'i': "+ ", + 'd': "- ", + 'r': "! ", + 'e': " ", + } + + started := false + m := NewMatcher(diff.A, diff.B) + for _, g := range m.GetGroupedOpCodes(diff.Context) { + if !started { + started = true + fromDate := "" + if len(diff.FromDate) > 0 { + fromDate = "\t" + diff.FromDate + } + toDate := "" + if len(diff.ToDate) > 0 { + toDate = "\t" + diff.ToDate + } + w("*** %s%s%s", diff.FromFile, fromDate, diff.Eol) + w("--- %s%s%s", diff.ToFile, toDate, diff.Eol) + } + + first, last := g[0], g[len(g)-1] + w("***************" + diff.Eol) + + range1 := formatRangeContext(first.I1, last.I2) + w("*** %s ****%s", range1, diff.Eol) + for _, c := range g { + if c.Tag == 'r' || c.Tag == 'd' { + for _, cc := range g { + if cc.Tag == 'i' { + continue + } + for _, line := range diff.A[cc.I1:cc.I2] { + w(prefix[cc.Tag] + line) + } + } + break + } + } + + range2 := formatRangeContext(first.J1, last.J2) + w("--- %s ----%s", range2, diff.Eol) + for _, c := range g { + if c.Tag == 'r' || c.Tag == 'i' { + for _, cc := range g { + if cc.Tag == 'd' { + continue + } + for _, line := range diff.B[cc.J1:cc.J2] { + w(prefix[cc.Tag] + line) + } + } + break + } + } + } + return diffErr +} + +// Like WriteContextDiff but returns the diff a string. +func GetContextDiffString(diff ContextDiff) (string, error) { + w := &bytes.Buffer{} + err := WriteContextDiff(w, diff) + return string(w.Bytes()), err +} diff --git a/difflib/difflib_test.go b/difflib/difflib_test.go index 62ce78f..d36c257 100644 --- a/difflib/difflib_test.go +++ b/difflib/difflib_test.go @@ -148,6 +148,46 @@ four` } } +func TestContextDiff(t *testing.T) { + a := `one +two +three +four` + b := `zero +one +tree +four` + diff := ContextDiff{ + A: splitLines(a), + B: splitLines(b), + FromFile: "Original", + ToFile: "Current", + Context: 3, + Eol: "\n", + } + result, err := GetContextDiffString(diff) + assertEqual(t, err, nil) + expected := `*** Original +--- Current +*************** +*** 1,4 **** + one +! two +! three + four +--- 1,4 ---- ++ zero + one +! tree + four +` + // TABs are a pain to preserve through editors + expected = strings.Replace(expected, "\\t", "\t", -1) + if expected != result { + t.Errorf("unexpected diff result:\n%s", result) + } +} + func rep(s string, count int) string { return strings.Repeat(s, count) } @@ -232,3 +272,68 @@ func TestOutputFormatRangeFormatUnified(t *testing.T) { assertEqual(t, fm(3, 6), "4,3") assertEqual(t, fm(0, 0), "0,0") } + +func TestOutputFormatRangeFormatContext(t *testing.T) { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + // + // The range of lines in file1 shall be written in the following format + // if the range contains two or more lines: + // "*** %d,%d ****\n", , + // and the following format otherwise: + // "*** %d ****\n", + // The ending line number of an empty range shall be the number of the preceding line, + // or 0 if the range is at the start of the file. + // + // Next, the range of lines in file2 shall be written in the following format + // if the range contains two or more lines: + // "--- %d,%d ----\n", , + // and the following format otherwise: + // "--- %d ----\n", + fm := formatRangeContext + assertEqual(t, fm(3, 3), "3") + assertEqual(t, fm(3, 4), "4") + assertEqual(t, fm(3, 5), "4,5") + assertEqual(t, fm(3, 6), "4,6") + assertEqual(t, fm(0, 0), "0") +} + +func TestOutputFormatTabDelimiter(t *testing.T) { + diff := UnifiedDiff{ + A: splitChars("one"), + B: splitChars("two"), + FromFile: "Original", + FromDate: "2005-01-26 23:30:50", + ToFile: "Current", + ToDate: "2010-04-12 10:20:52", + Eol: "\n", + } + ud, err := GetUnifiedDiffString(diff) + assertEqual(t, err, nil) + assertEqual(t, splitLines(ud)[:2], []string{ + "--- Original\t2005-01-26 23:30:50\n", + "+++ Current\t2010-04-12 10:20:52\n", + }) + cd, err := GetContextDiffString(ContextDiff(diff)) + assertEqual(t, err, nil) + assertEqual(t, splitLines(cd)[:2], []string{ + "*** Original\t2005-01-26 23:30:50\n", + "--- Current\t2010-04-12 10:20:52\n", + }) +} + +func TestOutputFormatNoTrailingTabOnEmptyFiledate(t *testing.T) { + diff := UnifiedDiff{ + A: splitChars("one"), + B: splitChars("two"), + FromFile: "Original", + ToFile: "Current", + Eol: "\n", + } + ud, err := GetUnifiedDiffString(diff) + assertEqual(t, err, nil) + assertEqual(t, splitLines(ud)[:2], []string{"--- Original\n", "+++ Current\n"}) + + cd, err := GetContextDiffString(ContextDiff(diff)) + assertEqual(t, err, nil) + assertEqual(t, splitLines(cd)[:2], []string{"*** Original\n", "--- Current\n"}) +}