From 45f0055c42620a642d46826138880dfc167f0fbe Mon Sep 17 00:00:00 2001 From: Thorsten Ball Date: Mon, 28 Sep 2020 14:33:59 +0200 Subject: [PATCH 1/3] Fix parsing of non-extended file headers and lines starting with -- This fixes #54 by making the detection of non-extended file headers (which start with `---` directly) that was introduced in #53 more robust. Instead of simply aborting when the current line starts with `---` (which is a valid hunk line, if you, say, remove a line starting with `--`), we confirm that the next line also starts with `+++` by peeking a bit ahead. That's also what `git` does: https://sourcegraph.com/github.com/git/git/-/blob/apply.c#L1574-1576 --- diff/diff_test.go | 1 + diff/parse.go | 30 +++++++++++++++++-- .../sample_hunk_lines_start_with_minuses.diff | 5 ++++ 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 diff/testdata/sample_hunk_lines_start_with_minuses.diff diff --git a/diff/diff_test.go b/diff/diff_test.go index 5ae55b8..50a138f 100644 --- a/diff/diff_test.go +++ b/diff/diff_test.go @@ -73,6 +73,7 @@ func TestParseHunksAndPrintHunks(t *testing.T) { {filename: "empty_new.diff"}, {filename: "oneline_hunk.diff"}, {filename: "empty.diff"}, + {filename: "sample_hunk_lines_start_with_minuses.diff"}, } for _, test := range tests { diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename)) diff --git a/diff/parse.go b/diff/parse.go index 6743502..f705e14 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -61,6 +61,7 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) { if e.Err == ErrNoFileHeader || e.Err == ErrExtendedHeadersEOF { return nil, io.EOF } + return nil, err case OverflowError: r.nextFileFirstLine = []byte(e) @@ -513,9 +514,22 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) { r.hunk.Section = section } else { // Read hunk body line. + + // If the line starts with `---` and the next one with `+++` we're + // looking at a non-extended file header and need to abort. + if bytes.HasPrefix(line, []byte("---")) { + ok, err := nextLineHasPrefix(r.reader, []byte("+++")) + if err != nil { + return r.hunk, err + } + if ok { + return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}} + } + } + + // If the line starts with the hunk prefix, this hunk is complete. if bytes.HasPrefix(line, hunkPrefix) { - // Saw start of new hunk, so this hunk is - // complete. But we've already read in the next hunk's + // But we've already read in the next hunk's // header, so we need to be sure that the next call to // ReadHunk starts with that header. r.nextHunkHeaderLine = line @@ -527,7 +541,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) { return r.hunk, nil } - if len(line) >= 1 && (!linePrefix(line[0]) || bytes.HasPrefix(line, []byte("--- "))) { + if len(line) >= 1 && !linePrefix(line[0]) { // Bad hunk header line. If we're reading a multi-file // diff, this may be the end of the current // file. Return a "rich" error that lets our caller @@ -579,6 +593,16 @@ func linePrefix(c byte) bool { return false } +// nextLineHasPrefix peeks into the given reader to check whether the next +// bytes match the given prefix. +func nextLineHasPrefix(reader *bufio.Reader, prefix []byte) (bool, error) { + next, err := reader.Peek(len(prefix)) + if err != nil { + return false, err + } + return bytes.HasPrefix(next, prefix), nil +} + // normalizeHeader takes a header of the form: // "@@ -linestart[,chunksize] +linestart[,chunksize] @@ section" // and returns two strings, with the first in the form: diff --git a/diff/testdata/sample_hunk_lines_start_with_minuses.diff b/diff/testdata/sample_hunk_lines_start_with_minuses.diff new file mode 100644 index 0000000..6540227 --- /dev/null +++ b/diff/testdata/sample_hunk_lines_start_with_minuses.diff @@ -0,0 +1,5 @@ +@@ -1,4 +1,3 @@ + select 1; +--- this is my query + select 2; + select 3; From 5726eda9cccc57d7506917231fb126d8242c4915 Mon Sep 17 00:00:00 2001 From: Thorsten Ball Date: Mon, 28 Sep 2020 16:01:01 +0200 Subject: [PATCH 2/3] Handle peek beyond last line --- diff/parse.go | 3 +++ diff/testdata/sample_hunk_lines_start_with_minuses.diff | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/diff/parse.go b/diff/parse.go index f705e14..0ea8f1f 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -598,6 +598,9 @@ func linePrefix(c byte) bool { func nextLineHasPrefix(reader *bufio.Reader, prefix []byte) (bool, error) { next, err := reader.Peek(len(prefix)) if err != nil { + if err == io.EOF { + return false, nil + } return false, err } return bytes.HasPrefix(next, prefix), nil diff --git a/diff/testdata/sample_hunk_lines_start_with_minuses.diff b/diff/testdata/sample_hunk_lines_start_with_minuses.diff index 6540227..61664d4 100644 --- a/diff/testdata/sample_hunk_lines_start_with_minuses.diff +++ b/diff/testdata/sample_hunk_lines_start_with_minuses.diff @@ -1,5 +1,6 @@ -@@ -1,4 +1,3 @@ +@@ -1,5 +1,3 @@ select 1; --- this is my query select 2; select 3; +--- this is the last line From 837bd652fa3836f4e23221475c97e59ede936257 Mon Sep 17 00:00:00 2001 From: Thorsten Ball Date: Mon, 28 Sep 2020 16:13:24 +0200 Subject: [PATCH 3/3] Rename nextLineHasPrefix to peekPrefix and change signature --- diff/parse.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/diff/parse.go b/diff/parse.go index 0ea8f1f..8d5cfc2 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -518,7 +518,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) { // If the line starts with `---` and the next one with `+++` we're // looking at a non-extended file header and need to abort. if bytes.HasPrefix(line, []byte("---")) { - ok, err := nextLineHasPrefix(r.reader, []byte("+++")) + ok, err := peekPrefix(r.reader, "+++") if err != nil { return r.hunk, err } @@ -593,9 +593,9 @@ func linePrefix(c byte) bool { return false } -// nextLineHasPrefix peeks into the given reader to check whether the next +// peekPrefix peeks into the given reader to check whether the next // bytes match the given prefix. -func nextLineHasPrefix(reader *bufio.Reader, prefix []byte) (bool, error) { +func peekPrefix(reader *bufio.Reader, prefix string) (bool, error) { next, err := reader.Peek(len(prefix)) if err != nil { if err == io.EOF { @@ -603,7 +603,7 @@ func nextLineHasPrefix(reader *bufio.Reader, prefix []byte) (bool, error) { } return false, err } - return bytes.HasPrefix(next, prefix), nil + return bytes.HasPrefix(next, []byte(prefix)), nil } // normalizeHeader takes a header of the form: