Fix parsing of non-extended file headers and lines starting with --

mrnugget · mrnugget · commit 45f0055c4262 · 2020-09-28T14:33:59.000+02:00
This fixes #54 by making the detection of non-extended file headers (which start with `---` directly) that was introduced in #53 more robust. Instead of simply aborting when the current line starts with `---` (which is a valid hunk line, if you, say, remove a line starting with `--`), we confirm that the next line also starts with `+++` by peeking a bit ahead. That's also what `git` does: https://sourcegraph.com/github.com/git/git/-/blob/apply.c#L1574-1576
diff --git a/diff/diff_test.go b/diff/diff_test.go
@@ -73,6 +73,7 @@ func TestParseHunksAndPrintHunks(t *testing.T) {
 		{filename: "empty_new.diff"},
 		{filename: "oneline_hunk.diff"},
 		{filename: "empty.diff"},
+		{filename: "sample_hunk_lines_start_with_minuses.diff"},
 	}
 	for _, test := range tests {
 		diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename))
diff --git a/diff/parse.go b/diff/parse.go
@@ -61,6 +61,7 @@ func (r *MultiFileDiffReader) ReadFile() (*FileDiff, error) {
 			if e.Err == ErrNoFileHeader || e.Err == ErrExtendedHeadersEOF {
 				return nil, io.EOF
 			}
+			return nil, err
 
 		case OverflowError:
 			r.nextFileFirstLine = []byte(e)
@@ -513,9 +514,22 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) {
 			r.hunk.Section = section
 		} else {
 			// Read hunk body line.
+
+			// If the line starts with `---` and the next one with `+++` we're
+			// looking at a non-extended file header and need to abort.
+			if bytes.HasPrefix(line, []byte("---")) {
+				ok, err := nextLineHasPrefix(r.reader, []byte("+++"))
+				if err != nil {
+					return r.hunk, err
+				}
+				if ok {
+					return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}}
+				}
+			}
+
+			// If the line starts with the hunk prefix, this hunk is complete.
 			if bytes.HasPrefix(line, hunkPrefix) {
-				// Saw start of new hunk, so this hunk is
-				// complete. But we've already read in the next hunk's
+				// But we've already read in the next hunk's
 				// header, so we need to be sure that the next call to
 				// ReadHunk starts with that header.
 				r.nextHunkHeaderLine = line
@@ -527,7 +541,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) {
 				return r.hunk, nil
 			}
 
-			if len(line) >= 1 && (!linePrefix(line[0]) || bytes.HasPrefix(line, []byte("--- "))) {
+			if len(line) >= 1 && !linePrefix(line[0]) {
 				// Bad hunk header line. If we're reading a multi-file
 				// diff, this may be the end of the current
 				// file. Return a "rich" error that lets our caller
@@ -579,6 +593,16 @@ func linePrefix(c byte) bool {
 	return false
 }
 
+// nextLineHasPrefix peeks into the given reader to check whether the next
+// bytes match the given prefix.
+func nextLineHasPrefix(reader *bufio.Reader, prefix []byte) (bool, error) {
+	next, err := reader.Peek(len(prefix))
+	if err != nil {
+		return false, err
+	}
+	return bytes.HasPrefix(next, prefix), nil
+}
+
 // normalizeHeader takes a header of the form:
 // "@@ -linestart[,chunksize] +linestart[,chunksize] @@ section"
 // and returns two strings, with the first in the form:
diff --git a/diff/testdata/sample_hunk_lines_start_with_minuses.diff b/diff/testdata/sample_hunk_lines_start_with_minuses.diff
@@ -0,0 +1,5 @@
+@@ -1,4 +1,3 @@
+ select 1;
+--- this is my query
+ select 2;
+ select 3;

Original file line number	Diff line number	Diff line change
`@@ -73,6 +73,7 @@ func TestParseHunksAndPrintHunks(t *testing.T) {`
`73`	`73`	`{filename: "empty_new.diff"},`
`74`	`74`	`{filename: "oneline_hunk.diff"},`
`75`	`75`	`{filename: "empty.diff"},`
	`76`	`+ {filename: "sample_hunk_lines_start_with_minuses.diff"},`
`76`	`77`	`}`
`77`	`78`	`for _, test := range tests {`
`78`	`79`	`diffData, err := ioutil.ReadFile(filepath.Join("testdata", test.filename))`