diff --git a/go.mod b/go.mod index 9525dcb38..b08446dd2 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/hashicorp/hcl/v2 v2.3.0 github.com/hashicorp/terraform-json v0.4.0 github.com/mitchellh/cli v1.0.0 + github.com/pmezard/go-difflib v1.0.0 github.com/sourcegraph/go-lsp v0.0.0-20200117082640-b19bb38222e2 github.com/zclconf/go-cty v1.2.1 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e diff --git a/internal/hcl/diff.go b/internal/hcl/diff.go new file mode 100644 index 000000000..e861aa712 --- /dev/null +++ b/internal/hcl/diff.go @@ -0,0 +1,120 @@ +package hcl + +import ( + "github.com/hashicorp/hcl/v2" + "github.com/hashicorp/terraform-ls/internal/filesystem" + "github.com/hashicorp/terraform-ls/internal/source" + "github.com/pmezard/go-difflib/difflib" +) + +type fileChange struct { + newText string + rng hcl.Range + opCode difflib.OpCode +} + +func (ch *fileChange) Text() string { + return ch.newText +} + +func (ch *fileChange) Range() hcl.Range { + return ch.rng +} + +const ( + OpReplace = 'r' + OpDelete = 'd' + OpInsert = 'i' + OpEqual = 'e' +) + +// Diff calculates difference between File's content +// and after byte sequence and returns it as filesystem.FileChanges +func Diff(f filesystem.File, after []byte) filesystem.FileChanges { + return diffLines(f.Filename(), + source.MakeSourceLines(f.Filename(), f.Text()), + source.MakeSourceLines(f.Filename(), after)) +} + +// diffLines calculates difference between two source.Lines +// and returns them as filesystem.FileChanges +func diffLines(filename string, beforeLines, afterLines source.Lines) filesystem.FileChanges { + context := 3 + + m := difflib.NewMatcher( + source.StringLines(beforeLines), + source.StringLines(afterLines)) + + changes := make(filesystem.FileChanges, 0) + + for _, group := range m.GetGroupedOpCodes(context) { + for _, c := range group { + beforeStart, beforeEnd := c.I1, c.I2 + afterStart, afterEnd := c.J1, c.J2 + + if c.Tag == OpEqual { + continue + } + + if c.Tag == OpReplace { + var rng hcl.Range + var newBytes []byte + + for i, line := range beforeLines[beforeStart:beforeEnd] { + if i == 0 { + rng = line.Range() + continue + } + rng.End = line.Range().End + } + + for _, line := range afterLines[afterStart:afterEnd] { + newBytes = append(newBytes, line.Bytes()...) + } + + changes = append(changes, &fileChange{ + newText: string(newBytes), + rng: rng, + }) + continue + } + + if c.Tag == OpDelete { + var deleteRng hcl.Range + for i, line := range beforeLines[beforeStart:beforeEnd] { + if i == 0 { + deleteRng = line.Range() + continue + } + deleteRng.End = line.Range().End + } + changes = append(changes, &fileChange{ + newText: "", + rng: deleteRng, + opCode: c, + }) + continue + } + + if c.Tag == OpInsert { + var insertRng hcl.Range + insertRng.Start = beforeLines[beforeStart-1].Range().End + insertRng.End = beforeLines[beforeStart-1].Range().End + var newBytes []byte + + for _, line := range afterLines[afterStart:afterEnd] { + newBytes = append(newBytes, line.Bytes()...) + } + + changes = append(changes, &fileChange{ + newText: string(newBytes), + rng: insertRng, + }) + continue + } + + } + } + + return changes +} diff --git a/internal/hcl/diff_test.go b/internal/hcl/diff_test.go new file mode 100644 index 000000000..7c759896d --- /dev/null +++ b/internal/hcl/diff_test.go @@ -0,0 +1,201 @@ +package hcl + +import ( + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/hashicorp/hcl/v2" + "github.com/hashicorp/terraform-ls/internal/filesystem" + "github.com/hashicorp/terraform-ls/internal/source" + "github.com/pmezard/go-difflib/difflib" +) + +func TestDiff(t *testing.T) { + testCases := []struct { + name string + beforeCfg, afterCfg string + expectedChanges filesystem.FileChanges + }{ + { + "no-op", + `aaa +bbb +ccc`, + `aaa +bbb +ccc`, + filesystem.FileChanges{}, + }, + { + "two separate lines replaced", + `resource "aws_vpc" "name" { + cidr_block = "sdf" + tags = { + "key" = "value" + sdfasd = 1 + s = 3 + } +}`, + `resource "aws_vpc" "name" { + cidr_block = "sdf" + tags = { + "key" = "value" + sdfasd = 1 + s = 3 + } +}`, + filesystem.FileChanges{ + &fileChange{ + newText: ` "key" = "value" +`, + rng: hcl.Range{ + Filename: "test.tf", + Start: hcl.Pos{Line: 4, Column: 1, Byte: 60}, + End: hcl.Pos{Line: 5, Column: 1, Byte: 80}, + }, + }, + &fileChange{ + newText: ` s = 3 +`, + rng: hcl.Range{ + Filename: "test.tf", + Start: hcl.Pos{Line: 6, Column: 1, Byte: 95}, + End: hcl.Pos{Line: 7, Column: 1, Byte: 105}, + }, + }, + }, + }, + { + "whitespace shrinking", + `resource "aws_vpc" "name" { + cidr_block = "sdf" + tags = { + "key" = "value" + sdfasd = 1 + s = 3 + + + } +}`, + `resource "aws_vpc" "name" { + cidr_block = "sdf" + tags = { + "key" = "value" + sdfasd = 1 + s = 3 + } +}`, + filesystem.FileChanges{ + &fileChange{ + newText: "", + rng: hcl.Range{ + Filename: "test.tf", + Start: hcl.Pos{Line: 7, Column: 1, Byte: 111}, + End: hcl.Pos{Line: 9, Column: 1, Byte: 113}, + }, + }, + }, + }, + { + "trailing whitespace removal", + `resource "aws_vpc" "name" { + +}`, + `resource "aws_vpc" "name" { + +}`, + filesystem.FileChanges{ + &fileChange{ + newText: "\n", + rng: hcl.Range{ + Filename: "test.tf", + Start: hcl.Pos{Line: 2, Column: 1, Byte: 28}, + End: hcl.Pos{Line: 3, Column: 1, Byte: 31}, + }, + }, + }, + }, + { + "new line insertion", + `resource "aws_vpc" "name" {}`, + `resource "aws_vpc" "name" { +}`, + filesystem.FileChanges{ + &fileChange{ + newText: `resource "aws_vpc" "name" { +}`, + rng: hcl.Range{ + Filename: "test.tf", + Start: hcl.Pos{Line: 1, Column: 1, Byte: 0}, + End: hcl.Pos{Line: 1, Column: 29, Byte: 28}, + }, + }, + }, + }, + { + "new line insertion at EOF", + `resource "aws_vpc" "name" { +} +`, + `resource "aws_vpc" "name" { +} + +`, + filesystem.FileChanges{ + &fileChange{ + newText: "\n", + rng: hcl.Range{ + Start: hcl.Pos{Line: 3, Column: 1, Byte: 30}, + End: hcl.Pos{Line: 3, Column: 1, Byte: 30}, + }, + }, + }, + }, + { + "line insertion", + `resource "aws_vpc" "name" { + attr1 = "one" + + attr3 = "three" +}`, + `resource "aws_vpc" "name" { + attr1 = "one" + attr2 = "two" + attr3 = "three" +}`, + filesystem.FileChanges{ + &fileChange{ + newText: ` attr2 = "two" +`, + rng: hcl.Range{ + Filename: "test.tf", + Start: hcl.Pos{Line: 3, Column: 1, Byte: 44}, + End: hcl.Pos{Line: 4, Column: 1, Byte: 45}, + }, + }, + }, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%d-%s", i, tc.name), func(t *testing.T) { + linesBefore := source.MakeSourceLines("test.tf", + []byte(tc.beforeCfg)) + linesAfter := source.MakeSourceLines("test.tf", + []byte(tc.afterCfg)) + + changes := diffLines("test.tf", linesBefore, linesAfter) + + opts := cmp.Options{ + cmp.AllowUnexported(fileChange{}), + cmpopts.IgnoreTypes(difflib.OpCode{}), + } + + if diff := cmp.Diff(tc.expectedChanges, changes, opts...); diff != "" { + t.Fatalf("Changes don't match: %s", diff) + } + }) + } +} diff --git a/internal/hcl/hcl.go b/internal/hcl/hcl.go index 6b023d572..28c0ab4e6 100644 --- a/internal/hcl/hcl.go +++ b/internal/hcl/hcl.go @@ -1,9 +1,6 @@ package hcl import ( - "fmt" - - "github.com/hashicorp/hcl/v2" hcllib "github.com/hashicorp/hcl/v2" "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/terraform-ls/internal/filesystem" @@ -11,7 +8,6 @@ import ( type File interface { BlockAtPosition(filesystem.FilePosition) (*hcllib.Block, hcllib.Pos, error) - Diff([]byte) (filesystem.FileChanges, error) } type file struct { @@ -49,36 +45,6 @@ func (f *file) BlockAtPosition(filePos filesystem.FilePosition) (*hcllib.Block, return b, pos, nil } -func (f *file) Diff(target []byte) (filesystem.FileChanges, error) { - var changes filesystem.FileChanges - - ast, _ := f.ast() - body, ok := ast.Body.(*hclsyntax.Body) - if !ok { - return nil, fmt.Errorf("invalid configuration format: %T", ast.Body) - } - - changes = append(changes, &fileChange{ - newText: target, - rng: body.Range(), - }) - - return changes, nil -} - -type fileChange struct { - newText []byte - rng hcl.Range -} - -func (ch *fileChange) Text() string { - return string(ch.newText) -} - -func (ch *fileChange) Range() hcl.Range { - return ch.rng -} - func (f *file) blockAtPosition(pos hcllib.Pos) (*hcllib.Block, error) { ast, _ := f.ast() diff --git a/internal/hcl/hcl_test.go b/internal/hcl/hcl_test.go index d4d4e75f5..c47c8d4be 100644 --- a/internal/hcl/hcl_test.go +++ b/internal/hcl/hcl_test.go @@ -150,50 +150,6 @@ func TestFile_BlockAtPosition(t *testing.T) { } } -func TestFile_diff(t *testing.T) { - targetCfg := []byte(` -provider "aws" { - first = "test" - very_long_attr = "xyz" -}`) - fsFile := filesystem.NewFile("/tmp/test.tf", []byte(` -provider "aws" { - first = "test" - very_long_attr = "xyz" -}`)) - f := NewFile(fsFile) - changes, err := f.Diff(targetCfg) - if err != nil { - t.Fatal(err) - } - - var expectedChanges filesystem.FileChanges - expectedChanges = append(expectedChanges, &fileChange{ - newText: []byte(targetCfg), - rng: hcl.Range{ - Filename: "test.tf", - Start: hcl.Pos{ - Column: 1, - Line: 1, - Byte: 0, - }, - End: hcl.Pos{ - Column: 2, - Line: 5, - Byte: 62, - }, - }, - }) - - opts := cmp.Options{ - cmp.AllowUnexported(fileChange{}), - } - - if diff := cmp.Diff(expectedChanges, changes, opts...); diff != "" { - t.Fatalf("Changes don't match: %s", diff) - } -} - type testPosition struct { filesystem.FileHandler pos hcl.Pos diff --git a/internal/lsp/position.go b/internal/lsp/position.go index a049aae46..c750bd1a3 100644 --- a/internal/lsp/position.go +++ b/internal/lsp/position.go @@ -63,11 +63,10 @@ func byteForLSPColumn(l source.Line, lspCol int) int { return l.Range().Start.Byte } - // Easy path: if the entire line is ASCII then column counts are equivalent - // in LSP vs. HCL aside from zero- vs. one-based counting. - if l.IsAllASCII() { - return l.Range().Start.Byte + lspCol - } + // Normally ASCII-only lines could be short-circuited here + // but it's not as easy to tell whether a line is ASCII-only + // based on column/byte differences as we also scan newlines + // and a single line range technically spans 2 lines. // If there are non-ASCII characters then we need to edge carefully // along the line while counting UTF-16 code units in our UTF-8 buffer, diff --git a/internal/source/source.go b/internal/source/source.go index 1524902c6..e77620224 100644 --- a/internal/source/source.go +++ b/internal/source/source.go @@ -1,7 +1,7 @@ package source import ( - "bufio" + "bytes" "github.com/hashicorp/hcl/v2" ) @@ -11,20 +11,14 @@ type sourceLine struct { rng hcl.Range } -// IsAllASCII returns true if the receiver is provably all ASCII, which allows -// for some fast paths where we can treat columns and bytes as equivalent. -func (l sourceLine) IsAllASCII() bool { - // If we have the same number of columns as bytes then our content is - // all ASCII, since it clearly contains no multi-byte grapheme clusters. - bytes := l.rng.End.Byte - l.rng.Start.Byte - columns := l.rng.End.Column - l.rng.Start.Column - return bytes == columns -} - +// Range returns range of the line bytes inc. any trailing end-of-line markers +// The range will span across two lines in most cases +// (other than last line without trailing new line) func (l sourceLine) Range() hcl.Range { return l.rng } +// Bytes returns the line byte inc. any trailing end-of-line markers func (l sourceLine) Bytes() []byte { return l.content } @@ -35,7 +29,7 @@ func MakeSourceLines(filename string, s []byte) []Line { return ret } - sc := hcl.NewRangeScanner(s, filename, bufio.ScanLines) + sc := hcl.NewRangeScanner(s, filename, scanLines) for sc.Scan() { ret = append(ret, sourceLine{ content: sc.Bytes(), @@ -45,3 +39,31 @@ func MakeSourceLines(filename string, s []byte) []Line { return ret } + +// scanLines is a split function for a Scanner that returns each line of +// text (separated by \n), INCLUDING any trailing end-of-line marker. +// The last non-empty line of input will be returned even if it has no +// newline. +func scanLines(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + // We have a full newline-terminated line. + return i + 1, data[0 : i+1], nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), data, nil + } + // Request more data. + return 0, nil, nil +} + +func StringLines(lines Lines) []string { + strLines := make([]string, len(lines)) + for i, l := range lines { + strLines[i] = string(l.Bytes()) + } + return strLines +} diff --git a/internal/source/source_test.go b/internal/source/source_test.go index e07687021..0db403bed 100644 --- a/internal/source/source_test.go +++ b/internal/source/source_test.go @@ -20,20 +20,3 @@ func TestMakeSourceLines_success(t *testing.T) { expectedLines, len(lines)) } } - -func TestSourceLine_isAllASCII(t *testing.T) { - lines := MakeSourceLines("/test.tf", []byte(`plaintext -smiley 🙃 here`)) - if len(lines) != 2 { - t.Fatal("Expected exactly 2 lines") - } - - if !lines[0].IsAllASCII() { - t.Fatalf("Expected first line (%q) to be reported as ASCII", - string(lines[0].Bytes())) - } - if lines[1].IsAllASCII() { - t.Fatalf("Expected second line (%q) NOT to be reported as ASCII", - string(lines[1].Bytes())) - } -} diff --git a/internal/source/types.go b/internal/source/types.go index 9df4565a5..98d7edda8 100644 --- a/internal/source/types.go +++ b/internal/source/types.go @@ -9,5 +9,4 @@ type Lines []Line type Line interface { Range() hcl.Range Bytes() []byte - IsAllASCII() bool } diff --git a/langserver/handlers/formatting.go b/langserver/handlers/formatting.go index a3419b148..c9e9423c0 100644 --- a/langserver/handlers/formatting.go +++ b/langserver/handlers/formatting.go @@ -31,16 +31,12 @@ func (h *logHandler) TextDocumentFormatting(ctx context.Context, params lsp.Docu return edits, err } - output, err := tf.Format(file.Text()) + formatted, err := tf.Format(file.Text()) if err != nil { return edits, err } - f := hcl.NewFile(file) - changes, err := f.Diff(output) - if err != nil { - return edits, err - } + changes := hcl.Diff(file, formatted) return ilsp.TextEdits(changes), nil } diff --git a/langserver/handlers/formatting_test.go b/langserver/handlers/formatting_test.go index 63a8b2be8..f1bd5fc8e 100644 --- a/langserver/handlers/formatting_test.go +++ b/langserver/handlers/formatting_test.go @@ -69,9 +69,9 @@ func TestLangServer_formatting(t *testing.T) { { "range": { "start": { "line": 0, "character": 0 }, - "end": { "line": 3, "character": 0 } + "end": { "line": 1, "character": 0 } }, - "newText": "provider \"test\" {\n\n}\n" + "newText": "provider \"test\" {\n" } ] }`) diff --git a/vendor/github.com/pmezard/go-difflib/LICENSE b/vendor/github.com/pmezard/go-difflib/LICENSE new file mode 100644 index 000000000..c67dad612 --- /dev/null +++ b/vendor/github.com/pmezard/go-difflib/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2013, Patrick Mezard +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + The names of its contributors may not be used to endorse or promote +products derived from this software without specific prior written +permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/pmezard/go-difflib/difflib/difflib.go b/vendor/github.com/pmezard/go-difflib/difflib/difflib.go new file mode 100644 index 000000000..003e99fad --- /dev/null +++ b/vendor/github.com/pmezard/go-difflib/difflib/difflib.go @@ -0,0 +1,772 @@ +// Package difflib is a partial port of Python difflib module. +// +// It provides tools to compare sequences of strings and generate textual diffs. +// +// The following class and functions have been ported: +// +// - SequenceMatcher +// +// - unified_diff +// +// - context_diff +// +// Getting unified diffs was the main goal of the port. Keep in mind this code +// is mostly suitable to output text differences in a human friendly way, there +// are no guarantees generated diffs are consumable by patch(1). +package difflib + +import ( + "bufio" + "bytes" + "fmt" + "io" + "strings" +) + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +func calculateRatio(matches, length int) float64 { + if length > 0 { + return 2.0 * float64(matches) / float64(length) + } + return 1.0 +} + +type Match struct { + A int + B int + Size int +} + +type OpCode struct { + Tag byte + I1 int + I2 int + J1 int + J2 int +} + +// SequenceMatcher compares sequence of strings. The basic +// algorithm predates, and is a little fancier than, an algorithm +// published in the late 1980's by Ratcliff and Obershelp under the +// hyperbolic name "gestalt pattern matching". The basic idea is to find +// the longest contiguous matching subsequence that contains no "junk" +// elements (R-O doesn't address junk). The same idea is then applied +// recursively to the pieces of the sequences to the left and to the right +// of the matching subsequence. This does not yield minimal edit +// sequences, but does tend to yield matches that "look right" to people. +// +// SequenceMatcher tries to compute a "human-friendly diff" between two +// sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the +// longest *contiguous* & junk-free matching subsequence. That's what +// catches peoples' eyes. The Windows(tm) windiff has another interesting +// notion, pairing up elements that appear uniquely in each sequence. +// That, and the method here, appear to yield more intuitive difference +// reports than does diff. This method appears to be the least vulnerable +// to synching up on blocks of "junk lines", though (like blank lines in +// ordinary text files, or maybe "

" lines in HTML files). That may be +// because this is the only method of the 3 that has a *concept* of +// "junk" . +// +// Timing: Basic R-O is cubic time worst case and quadratic time expected +// case. SequenceMatcher is quadratic time for the worst case and has +// expected-case behavior dependent in a complicated way on how many +// elements the sequences have in common; best case time is linear. +type SequenceMatcher struct { + a []string + b []string + b2j map[string][]int + IsJunk func(string) bool + autoJunk bool + bJunk map[string]struct{} + matchingBlocks []Match + fullBCount map[string]int + bPopular map[string]struct{} + opCodes []OpCode +} + +func NewMatcher(a, b []string) *SequenceMatcher { + m := SequenceMatcher{autoJunk: true} + m.SetSeqs(a, b) + return &m +} + +func NewMatcherWithJunk(a, b []string, autoJunk bool, + isJunk func(string) bool) *SequenceMatcher { + + m := SequenceMatcher{IsJunk: isJunk, autoJunk: autoJunk} + m.SetSeqs(a, b) + return &m +} + +// Set two sequences to be compared. +func (m *SequenceMatcher) SetSeqs(a, b []string) { + m.SetSeq1(a) + m.SetSeq2(b) +} + +// Set the first sequence to be compared. The second sequence to be compared is +// not changed. +// +// SequenceMatcher computes and caches detailed information about the second +// sequence, so if you want to compare one sequence S against many sequences, +// use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other +// sequences. +// +// See also SetSeqs() and SetSeq2(). +func (m *SequenceMatcher) SetSeq1(a []string) { + if &a == &m.a { + return + } + m.a = a + m.matchingBlocks = nil + m.opCodes = nil +} + +// Set the second sequence to be compared. The first sequence to be compared is +// not changed. +func (m *SequenceMatcher) SetSeq2(b []string) { + if &b == &m.b { + return + } + m.b = b + m.matchingBlocks = nil + m.opCodes = nil + m.fullBCount = nil + m.chainB() +} + +func (m *SequenceMatcher) chainB() { + // Populate line -> index mapping + b2j := map[string][]int{} + for i, s := range m.b { + indices := b2j[s] + indices = append(indices, i) + b2j[s] = indices + } + + // Purge junk elements + m.bJunk = map[string]struct{}{} + if m.IsJunk != nil { + junk := m.bJunk + for s, _ := range b2j { + if m.IsJunk(s) { + junk[s] = struct{}{} + } + } + for s, _ := range junk { + delete(b2j, s) + } + } + + // Purge remaining popular elements + popular := map[string]struct{}{} + n := len(m.b) + if m.autoJunk && n >= 200 { + ntest := n/100 + 1 + for s, indices := range b2j { + if len(indices) > ntest { + popular[s] = struct{}{} + } + } + for s, _ := range popular { + delete(b2j, s) + } + } + m.bPopular = popular + m.b2j = b2j +} + +func (m *SequenceMatcher) isBJunk(s string) bool { + _, ok := m.bJunk[s] + return ok +} + +// Find longest matching block in a[alo:ahi] and b[blo:bhi]. +// +// If IsJunk is not defined: +// +// Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where +// alo <= i <= i+k <= ahi +// blo <= j <= j+k <= bhi +// and for all (i',j',k') meeting those conditions, +// k >= k' +// i <= i' +// and if i == i', j <= j' +// +// In other words, of all maximal matching blocks, return one that +// starts earliest in a, and of all those maximal matching blocks that +// start earliest in a, return the one that starts earliest in b. +// +// If IsJunk is defined, first the longest matching block is +// determined as above, but with the additional restriction that no +// junk element appears in the block. Then that block is extended as +// far as possible by matching (only) junk elements on both sides. So +// the resulting block never matches on junk except as identical junk +// happens to be adjacent to an "interesting" match. +// +// If no blocks match, return (alo, blo, 0). +func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match { + // CAUTION: stripping common prefix or suffix would be incorrect. + // E.g., + // ab + // acab + // Longest matching block is "ab", but if common prefix is + // stripped, it's "a" (tied with "b"). UNIX(tm) diff does so + // strip, so ends up claiming that ab is changed to acab by + // inserting "ca" in the middle. That's minimal but unintuitive: + // "it's obvious" that someone inserted "ac" at the front. + // Windiff ends up at the same place as diff, but by pairing up + // the unique 'b's and then matching the first two 'a's. + besti, bestj, bestsize := alo, blo, 0 + + // find longest junk-free match + // during an iteration of the loop, j2len[j] = length of longest + // junk-free match ending with a[i-1] and b[j] + j2len := map[int]int{} + for i := alo; i != ahi; i++ { + // look at all instances of a[i] in b; note that because + // b2j has no junk keys, the loop is skipped if a[i] is junk + newj2len := map[int]int{} + for _, j := range m.b2j[m.a[i]] { + // a[i] matches b[j] + if j < blo { + continue + } + if j >= bhi { + break + } + k := j2len[j-1] + 1 + newj2len[j] = k + if k > bestsize { + besti, bestj, bestsize = i-k+1, j-k+1, k + } + } + j2len = newj2len + } + + // Extend the best by non-junk elements on each end. In particular, + // "popular" non-junk elements aren't in b2j, which greatly speeds + // the inner loop above, but also means "the best" match so far + // doesn't contain any junk *or* popular non-junk elements. + for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) && + m.a[besti-1] == m.b[bestj-1] { + besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 + } + for besti+bestsize < ahi && bestj+bestsize < bhi && + !m.isBJunk(m.b[bestj+bestsize]) && + m.a[besti+bestsize] == m.b[bestj+bestsize] { + bestsize += 1 + } + + // Now that we have a wholly interesting match (albeit possibly + // empty!), we may as well suck up the matching junk on each + // side of it too. Can't think of a good reason not to, and it + // saves post-processing the (possibly considerable) expense of + // figuring out what to do with it. In the case of an empty + // interesting match, this is clearly the right thing to do, + // because no other kind of match is possible in the regions. + for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) && + m.a[besti-1] == m.b[bestj-1] { + besti, bestj, bestsize = besti-1, bestj-1, bestsize+1 + } + for besti+bestsize < ahi && bestj+bestsize < bhi && + m.isBJunk(m.b[bestj+bestsize]) && + m.a[besti+bestsize] == m.b[bestj+bestsize] { + bestsize += 1 + } + + return Match{A: besti, B: bestj, Size: bestsize} +} + +// Return list of triples describing matching subsequences. +// +// Each triple is of the form (i, j, n), and means that +// a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in +// i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are +// adjacent triples in the list, and the second is not the last triple in the +// list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe +// adjacent equal blocks. +// +// The last triple is a dummy, (len(a), len(b), 0), and is the only +// triple with n==0. +func (m *SequenceMatcher) GetMatchingBlocks() []Match { + if m.matchingBlocks != nil { + return m.matchingBlocks + } + + var matchBlocks func(alo, ahi, blo, bhi int, matched []Match) []Match + matchBlocks = func(alo, ahi, blo, bhi int, matched []Match) []Match { + match := m.findLongestMatch(alo, ahi, blo, bhi) + i, j, k := match.A, match.B, match.Size + if match.Size > 0 { + if alo < i && blo < j { + matched = matchBlocks(alo, i, blo, j, matched) + } + matched = append(matched, match) + if i+k < ahi && j+k < bhi { + matched = matchBlocks(i+k, ahi, j+k, bhi, matched) + } + } + return matched + } + matched := matchBlocks(0, len(m.a), 0, len(m.b), nil) + + // It's possible that we have adjacent equal blocks in the + // matching_blocks list now. + nonAdjacent := []Match{} + i1, j1, k1 := 0, 0, 0 + for _, b := range matched { + // Is this block adjacent to i1, j1, k1? + i2, j2, k2 := b.A, b.B, b.Size + if i1+k1 == i2 && j1+k1 == j2 { + // Yes, so collapse them -- this just increases the length of + // the first block by the length of the second, and the first + // block so lengthened remains the block to compare against. + k1 += k2 + } else { + // Not adjacent. Remember the first block (k1==0 means it's + // the dummy we started with), and make the second block the + // new block to compare against. + if k1 > 0 { + nonAdjacent = append(nonAdjacent, Match{i1, j1, k1}) + } + i1, j1, k1 = i2, j2, k2 + } + } + if k1 > 0 { + nonAdjacent = append(nonAdjacent, Match{i1, j1, k1}) + } + + nonAdjacent = append(nonAdjacent, Match{len(m.a), len(m.b), 0}) + m.matchingBlocks = nonAdjacent + return m.matchingBlocks +} + +// Return list of 5-tuples describing how to turn a into b. +// +// Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple +// has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the +// tuple preceding it, and likewise for j1 == the previous j2. +// +// The tags are characters, with these meanings: +// +// 'r' (replace): a[i1:i2] should be replaced by b[j1:j2] +// +// 'd' (delete): a[i1:i2] should be deleted, j1==j2 in this case. +// +// 'i' (insert): b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case. +// +// 'e' (equal): a[i1:i2] == b[j1:j2] +func (m *SequenceMatcher) GetOpCodes() []OpCode { + if m.opCodes != nil { + return m.opCodes + } + i, j := 0, 0 + matching := m.GetMatchingBlocks() + opCodes := make([]OpCode, 0, len(matching)) + for _, m := range matching { + // invariant: we've pumped out correct diffs to change + // a[:i] into b[:j], and the next matching block is + // a[ai:ai+size] == b[bj:bj+size]. So we need to pump + // out a diff to change a[i:ai] into b[j:bj], pump out + // the matching block, and move (i,j) beyond the match + ai, bj, size := m.A, m.B, m.Size + tag := byte(0) + if i < ai && j < bj { + tag = 'r' + } else if i < ai { + tag = 'd' + } else if j < bj { + tag = 'i' + } + if tag > 0 { + opCodes = append(opCodes, OpCode{tag, i, ai, j, bj}) + } + i, j = ai+size, bj+size + // the list of matching blocks is terminated by a + // sentinel with size 0 + if size > 0 { + opCodes = append(opCodes, OpCode{'e', ai, i, bj, j}) + } + } + m.opCodes = opCodes + return m.opCodes +} + +// Isolate change clusters by eliminating ranges with no changes. +// +// Return a generator of groups with up to n lines of context. +// Each group is in the same format as returned by GetOpCodes(). +func (m *SequenceMatcher) GetGroupedOpCodes(n int) [][]OpCode { + if n < 0 { + n = 3 + } + codes := m.GetOpCodes() + if len(codes) == 0 { + codes = []OpCode{OpCode{'e', 0, 1, 0, 1}} + } + // Fixup leading and trailing groups if they show no changes. + if codes[0].Tag == 'e' { + c := codes[0] + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + codes[0] = OpCode{c.Tag, max(i1, i2-n), i2, max(j1, j2-n), j2} + } + if codes[len(codes)-1].Tag == 'e' { + c := codes[len(codes)-1] + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + codes[len(codes)-1] = OpCode{c.Tag, i1, min(i2, i1+n), j1, min(j2, j1+n)} + } + nn := n + n + groups := [][]OpCode{} + group := []OpCode{} + for _, c := range codes { + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + // End the current group and start a new one whenever + // there is a large range with no changes. + if c.Tag == 'e' && i2-i1 > nn { + group = append(group, OpCode{c.Tag, i1, min(i2, i1+n), + j1, min(j2, j1+n)}) + groups = append(groups, group) + group = []OpCode{} + i1, j1 = max(i1, i2-n), max(j1, j2-n) + } + group = append(group, OpCode{c.Tag, i1, i2, j1, j2}) + } + if len(group) > 0 && !(len(group) == 1 && group[0].Tag == 'e') { + groups = append(groups, group) + } + return groups +} + +// Return a measure of the sequences' similarity (float in [0,1]). +// +// Where T is the total number of elements in both sequences, and +// M is the number of matches, this is 2.0*M / T. +// Note that this is 1 if the sequences are identical, and 0 if +// they have nothing in common. +// +// .Ratio() is expensive to compute if you haven't already computed +// .GetMatchingBlocks() or .GetOpCodes(), in which case you may +// want to try .QuickRatio() or .RealQuickRation() first to get an +// upper bound. +func (m *SequenceMatcher) Ratio() float64 { + matches := 0 + for _, m := range m.GetMatchingBlocks() { + matches += m.Size + } + return calculateRatio(matches, len(m.a)+len(m.b)) +} + +// Return an upper bound on ratio() relatively quickly. +// +// This isn't defined beyond that it is an upper bound on .Ratio(), and +// is faster to compute. +func (m *SequenceMatcher) QuickRatio() float64 { + // viewing a and b as multisets, set matches to the cardinality + // of their intersection; this counts the number of matches + // without regard to order, so is clearly an upper bound + if m.fullBCount == nil { + m.fullBCount = map[string]int{} + for _, s := range m.b { + m.fullBCount[s] = m.fullBCount[s] + 1 + } + } + + // avail[x] is the number of times x appears in 'b' less the + // number of times we've seen it in 'a' so far ... kinda + avail := map[string]int{} + matches := 0 + for _, s := range m.a { + n, ok := avail[s] + if !ok { + n = m.fullBCount[s] + } + avail[s] = n - 1 + if n > 0 { + matches += 1 + } + } + return calculateRatio(matches, len(m.a)+len(m.b)) +} + +// Return an upper bound on ratio() very quickly. +// +// This isn't defined beyond that it is an upper bound on .Ratio(), and +// is faster to compute than either .Ratio() or .QuickRatio(). +func (m *SequenceMatcher) RealQuickRatio() float64 { + la, lb := len(m.a), len(m.b) + return calculateRatio(min(la, lb), la+lb) +} + +// Convert range to the "ed" format +func formatRangeUnified(start, stop int) string { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning := start + 1 // lines start numbering with one + length := stop - start + if length == 1 { + return fmt.Sprintf("%d", beginning) + } + if length == 0 { + beginning -= 1 // empty ranges begin at line just before the range + } + return fmt.Sprintf("%d,%d", beginning, length) +} + +// Unified diff parameters +type UnifiedDiff struct { + A []string // First sequence lines + FromFile string // First file name + FromDate string // First file time + B []string // Second sequence lines + ToFile string // Second file name + ToDate string // Second file time + Eol string // Headers end of line, defaults to LF + Context int // Number of context lines +} + +// Compare two sequences of lines; generate the delta as a unified diff. +// +// Unified diffs are a compact way of showing line changes and a few +// lines of context. The number of context lines is set by 'n' which +// defaults to three. +// +// By default, the diff control lines (those with ---, +++, or @@) are +// created with a trailing newline. This is helpful so that inputs +// created from file.readlines() result in diffs that are suitable for +// file.writelines() since both the inputs and outputs have trailing +// newlines. +// +// For inputs that do not have trailing newlines, set the lineterm +// argument to "" so that the output will be uniformly newline free. +// +// The unidiff format normally has a header for filenames and modification +// times. Any or all of these may be specified using strings for +// 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. +// The modification times are normally expressed in the ISO 8601 format. +func WriteUnifiedDiff(writer io.Writer, diff UnifiedDiff) error { + buf := bufio.NewWriter(writer) + defer buf.Flush() + wf := func(format string, args ...interface{}) error { + _, err := buf.WriteString(fmt.Sprintf(format, args...)) + return err + } + ws := func(s string) error { + _, err := buf.WriteString(s) + return err + } + + if len(diff.Eol) == 0 { + diff.Eol = "\n" + } + + started := false + m := NewMatcher(diff.A, diff.B) + for _, g := range m.GetGroupedOpCodes(diff.Context) { + if !started { + started = true + fromDate := "" + if len(diff.FromDate) > 0 { + fromDate = "\t" + diff.FromDate + } + toDate := "" + if len(diff.ToDate) > 0 { + toDate = "\t" + diff.ToDate + } + if diff.FromFile != "" || diff.ToFile != "" { + err := wf("--- %s%s%s", diff.FromFile, fromDate, diff.Eol) + if err != nil { + return err + } + err = wf("+++ %s%s%s", diff.ToFile, toDate, diff.Eol) + if err != nil { + return err + } + } + } + first, last := g[0], g[len(g)-1] + range1 := formatRangeUnified(first.I1, last.I2) + range2 := formatRangeUnified(first.J1, last.J2) + if err := wf("@@ -%s +%s @@%s", range1, range2, diff.Eol); err != nil { + return err + } + for _, c := range g { + i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2 + if c.Tag == 'e' { + for _, line := range diff.A[i1:i2] { + if err := ws(" " + line); err != nil { + return err + } + } + continue + } + if c.Tag == 'r' || c.Tag == 'd' { + for _, line := range diff.A[i1:i2] { + if err := ws("-" + line); err != nil { + return err + } + } + } + if c.Tag == 'r' || c.Tag == 'i' { + for _, line := range diff.B[j1:j2] { + if err := ws("+" + line); err != nil { + return err + } + } + } + } + } + return nil +} + +// Like WriteUnifiedDiff but returns the diff a string. +func GetUnifiedDiffString(diff UnifiedDiff) (string, error) { + w := &bytes.Buffer{} + err := WriteUnifiedDiff(w, diff) + return string(w.Bytes()), err +} + +// Convert range to the "ed" format. +func formatRangeContext(start, stop int) string { + // Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning := start + 1 // lines start numbering with one + length := stop - start + if length == 0 { + beginning -= 1 // empty ranges begin at line just before the range + } + if length <= 1 { + return fmt.Sprintf("%d", beginning) + } + return fmt.Sprintf("%d,%d", beginning, beginning+length-1) +} + +type ContextDiff UnifiedDiff + +// Compare two sequences of lines; generate the delta as a context diff. +// +// Context diffs are a compact way of showing line changes and a few +// lines of context. The number of context lines is set by diff.Context +// which defaults to three. +// +// By default, the diff control lines (those with *** or ---) are +// created with a trailing newline. +// +// For inputs that do not have trailing newlines, set the diff.Eol +// argument to "" so that the output will be uniformly newline free. +// +// The context diff format normally has a header for filenames and +// modification times. Any or all of these may be specified using +// strings for diff.FromFile, diff.ToFile, diff.FromDate, diff.ToDate. +// The modification times are normally expressed in the ISO 8601 format. +// If not specified, the strings default to blanks. +func WriteContextDiff(writer io.Writer, diff ContextDiff) error { + buf := bufio.NewWriter(writer) + defer buf.Flush() + var diffErr error + wf := func(format string, args ...interface{}) { + _, err := buf.WriteString(fmt.Sprintf(format, args...)) + if diffErr == nil && err != nil { + diffErr = err + } + } + ws := func(s string) { + _, err := buf.WriteString(s) + if diffErr == nil && err != nil { + diffErr = err + } + } + + if len(diff.Eol) == 0 { + diff.Eol = "\n" + } + + prefix := map[byte]string{ + 'i': "+ ", + 'd': "- ", + 'r': "! ", + 'e': " ", + } + + started := false + m := NewMatcher(diff.A, diff.B) + for _, g := range m.GetGroupedOpCodes(diff.Context) { + if !started { + started = true + fromDate := "" + if len(diff.FromDate) > 0 { + fromDate = "\t" + diff.FromDate + } + toDate := "" + if len(diff.ToDate) > 0 { + toDate = "\t" + diff.ToDate + } + if diff.FromFile != "" || diff.ToFile != "" { + wf("*** %s%s%s", diff.FromFile, fromDate, diff.Eol) + wf("--- %s%s%s", diff.ToFile, toDate, diff.Eol) + } + } + + first, last := g[0], g[len(g)-1] + ws("***************" + diff.Eol) + + range1 := formatRangeContext(first.I1, last.I2) + wf("*** %s ****%s", range1, diff.Eol) + for _, c := range g { + if c.Tag == 'r' || c.Tag == 'd' { + for _, cc := range g { + if cc.Tag == 'i' { + continue + } + for _, line := range diff.A[cc.I1:cc.I2] { + ws(prefix[cc.Tag] + line) + } + } + break + } + } + + range2 := formatRangeContext(first.J1, last.J2) + wf("--- %s ----%s", range2, diff.Eol) + for _, c := range g { + if c.Tag == 'r' || c.Tag == 'i' { + for _, cc := range g { + if cc.Tag == 'd' { + continue + } + for _, line := range diff.B[cc.J1:cc.J2] { + ws(prefix[cc.Tag] + line) + } + } + break + } + } + } + return diffErr +} + +// Like WriteContextDiff but returns the diff a string. +func GetContextDiffString(diff ContextDiff) (string, error) { + w := &bytes.Buffer{} + err := WriteContextDiff(w, diff) + return string(w.Bytes()), err +} + +// Split a string on "\n" while preserving them. The output can be used +// as input for UnifiedDiff and ContextDiff structures. +func SplitLines(s string) []string { + lines := strings.SplitAfter(s, "\n") + lines[len(lines)-1] += "\n" + return lines +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 1258e0b8e..94bfcfbe1 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -47,6 +47,8 @@ github.com/mattn/go-isatty github.com/mitchellh/cli # github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 github.com/mitchellh/go-wordwrap +# github.com/pmezard/go-difflib v1.0.0 +github.com/pmezard/go-difflib/difflib # github.com/posener/complete v1.1.1 github.com/posener/complete github.com/posener/complete/cmd