Skip to content

Commit f740d24

Browse files
replace manual scanning with go/scanner call
1 parent 801c8ec commit f740d24

File tree

1 file changed

+74
-147
lines changed

1 file changed

+74
-147
lines changed

src/cmd/cover/cover.go

Lines changed: 74 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"fmt"
1414
"go/ast"
1515
"go/parser"
16+
"go/scanner"
1617
"go/token"
1718
"internal/coverage"
1819
"internal/coverage/encodemeta"
@@ -274,86 +275,10 @@ type BlockSegment struct {
274275
hasCode bool // true if this segment contains executable code
275276
}
276277

277-
// blockSplitter holds the state for splitting a block by comments.
278-
// It tracks cursor position, line state, and section boundaries
279-
// while scanning through source code character by character.
280-
type blockSplitter struct {
281-
// Source information
282-
file *token.File
283-
startOffset int
284-
285-
// Accumulated results
286-
segments []BlockSegment
287-
288-
// Cursor position
289-
i int
290-
indexStartCurrentLine int
291-
currentOffset int
292-
currentSegmentStart token.Pos
293-
294-
// Section state (persists across lines)
295-
inCommentedSection bool
296-
297-
// Line state (reset each line)
298-
lineHasCode bool
299-
lineHasComment bool
300-
301-
// Cursor state (tracks what we're inside)
302-
inSingleLineComment bool
303-
inMultiLineComment bool
304-
inString bool
305-
inRawString bool
306-
}
307-
308-
// processLine handles end-of-line processing: computes state transitions
309-
// and decides whether to create a new segment based on code/comment boundaries.
310-
func (bs *blockSplitter) processLine() {
311-
lineStart := bs.currentOffset
312-
lineEnd := bs.currentOffset + (bs.i - bs.indexStartCurrentLine)
313-
314-
if bs.inCommentedSection && bs.lineHasCode {
315-
// End of commented section, start of code section
316-
segmentEnd := bs.file.Pos(lineStart)
317-
bs.segments = append(bs.segments, BlockSegment{
318-
start: bs.currentSegmentStart,
319-
end: segmentEnd,
320-
hasCode: false,
321-
})
322-
bs.currentSegmentStart = bs.file.Pos(lineStart)
323-
bs.inCommentedSection = false
324-
} else if !bs.inCommentedSection && !bs.lineHasCode && bs.lineHasComment {
325-
// End of code section, start of commented section
326-
segmentEnd := bs.file.Pos(lineStart)
327-
if bs.currentSegmentStart < segmentEnd {
328-
bs.segments = append(bs.segments, BlockSegment{
329-
start: bs.currentSegmentStart,
330-
end: segmentEnd,
331-
hasCode: true,
332-
})
333-
}
334-
bs.currentSegmentStart = bs.file.Pos(lineStart)
335-
bs.inCommentedSection = true
336-
}
337-
338-
bs.currentOffset = lineEnd
339-
}
340-
341-
// resetLineState resets line-specific state for a new line.
342-
func (bs *blockSplitter) resetLineState() {
343-
bs.indexStartCurrentLine = bs.i
344-
bs.lineHasComment = bs.inMultiLineComment
345-
bs.lineHasCode = false
346-
bs.inSingleLineComment = false
347-
}
348-
349-
// inStringOrComment returns true if currently inside a string or comment.
350-
func (bs *blockSplitter) inStringOrComment() bool {
351-
return bs.inString || bs.inRawString || bs.inSingleLineComment || bs.inMultiLineComment
352-
}
353-
354278
// splitBlockByComments analyzes a block range and splits it into segments,
355279
// separating executable code from any commented lines.
356-
// To do this, it reads character by character the original source code.
280+
// It uses go/scanner to tokenize the source and identify which lines
281+
// contain executable code vs. only comments.
357282
func (f *File) splitBlockByComments(start, end token.Pos) []BlockSegment {
358283
startOffset := f.offset(start)
359284
endOffset := f.offset(end)
@@ -362,99 +287,101 @@ func (f *File) splitBlockByComments(start, end token.Pos) []BlockSegment {
362287
return []BlockSegment{{start: start, end: end, hasCode: true}}
363288
}
364289

365-
originalSourceCode := f.content[startOffset:endOffset]
290+
src := f.content[startOffset:endOffset]
291+
origFile := f.fset.File(start)
366292

367-
bs := &blockSplitter{
368-
file: f.fset.File(start),
369-
startOffset: startOffset,
370-
currentOffset: startOffset,
371-
currentSegmentStart: start,
372-
}
293+
// Create a new file set for scanning this block
294+
fset := token.NewFileSet()
295+
file := fset.AddFile("", -1, len(src))
373296

374-
for bs.i < len(originalSourceCode) {
375-
char := originalSourceCode[bs.i]
297+
var s scanner.Scanner
298+
s.Init(file, src, nil, scanner.ScanComments)
376299

377-
if char == '\\' && bs.inString && bs.i+1 < len(originalSourceCode) {
378-
bs.lineHasCode = true
379-
bs.i += 2 // Skip escaped character
380-
continue
381-
}
300+
// Track which lines have code vs only comments
301+
lineHasCode := make(map[int]bool)
302+
lineHasComment := make(map[int]bool)
382303

383-
if char == '"' && !bs.inRawString && !bs.inSingleLineComment && !bs.inMultiLineComment {
384-
bs.lineHasCode = true
385-
bs.inString = !bs.inString
386-
bs.i++
387-
continue
304+
for {
305+
pos, tok, lit := s.Scan()
306+
if tok == token.EOF {
307+
break
388308
}
389309

390-
if char == '`' && !bs.inString && !bs.inSingleLineComment && !bs.inMultiLineComment {
391-
bs.lineHasCode = true
392-
bs.inRawString = !bs.inRawString
393-
bs.i++
394-
continue
310+
// Calculate start and end lines using the standard pattern from go/ast
311+
// (e.g., ast.Comment.End() returns pos + len(text))
312+
startLine := file.Line(pos)
313+
endLine := file.Line(pos + token.Pos(len(lit)) - 1)
314+
if endLine < startLine {
315+
endLine = startLine
395316
}
396317

397-
if char == '\n' {
398-
bs.i++
399-
bs.processLine()
400-
bs.resetLineState()
401-
continue
402-
}
403-
404-
if bs.i+1 < len(originalSourceCode) {
405-
nextChar := originalSourceCode[bs.i+1]
406-
if char == '/' && nextChar == '/' && !bs.inString && !bs.inRawString && !bs.inMultiLineComment {
407-
bs.inSingleLineComment = true
408-
bs.lineHasComment = true
409-
bs.i += 2
410-
continue
411-
}
412-
413-
if char == '/' && nextChar == '*' && !bs.inString && !bs.inRawString && !bs.inSingleLineComment {
414-
bs.inMultiLineComment = true
415-
bs.lineHasComment = true
416-
bs.i += 2
417-
continue
318+
if tok == token.COMMENT {
319+
// Mark all lines spanned by this comment
320+
for line := startLine; line <= endLine; line++ {
321+
lineHasComment[line] = true
418322
}
419-
420-
if char == '*' && nextChar == '/' && bs.inMultiLineComment {
421-
bs.inMultiLineComment = false
422-
bs.lineHasComment = true
423-
bs.i += 2
424-
continue
323+
} else {
324+
// Mark all lines spanned by this token as having code
325+
for line := startLine; line <= endLine; line++ {
326+
lineHasCode[line] = true
425327
}
426328
}
329+
}
427330

428-
// If we matched nothing else and the char is not a whitespace, we are in normal code.
429-
if !bs.lineHasCode && !isWhitespace(char) && !bs.inSingleLineComment && !bs.inMultiLineComment {
430-
bs.lineHasCode = true
331+
// Build segments based on line transitions
332+
// The scanner has already built the line table in file, so we can use
333+
// file.LineStart() to get positions directly instead of manual calculation.
334+
var segments []BlockSegment
335+
var currentSegmentStart token.Pos = start
336+
inCommentedSection := false
337+
338+
totalLines := file.LineCount()
339+
for line := 1; line <= totalLines; line++ {
340+
hasCode := lineHasCode[line]
341+
hasComment := lineHasComment[line]
342+
343+
if inCommentedSection && hasCode {
344+
// End of commented section, start of code section
345+
lineOffset := file.Offset(file.LineStart(line))
346+
segmentEnd := origFile.Pos(startOffset + lineOffset)
347+
segments = append(segments, BlockSegment{
348+
start: currentSegmentStart,
349+
end: segmentEnd,
350+
hasCode: false,
351+
})
352+
currentSegmentStart = segmentEnd
353+
inCommentedSection = false
354+
} else if !inCommentedSection && !hasCode && hasComment {
355+
// End of code section, start of commented section
356+
lineOffset := file.Offset(file.LineStart(line))
357+
segmentEnd := origFile.Pos(startOffset + lineOffset)
358+
if currentSegmentStart < segmentEnd {
359+
segments = append(segments, BlockSegment{
360+
start: currentSegmentStart,
361+
end: segmentEnd,
362+
hasCode: true,
363+
})
364+
}
365+
currentSegmentStart = segmentEnd
366+
inCommentedSection = true
431367
}
432-
433-
bs.i++
434368
}
435369

436-
// Process the last line if it doesn't end with a newline
437-
bs.processLine()
438-
439370
// Add the final segment
440-
if bs.currentSegmentStart < end {
441-
bs.segments = append(bs.segments, BlockSegment{
442-
start: bs.currentSegmentStart,
371+
if currentSegmentStart < end {
372+
segments = append(segments, BlockSegment{
373+
start: currentSegmentStart,
443374
end: end,
444-
hasCode: !bs.inCommentedSection,
375+
hasCode: !inCommentedSection,
445376
})
446377
}
447378

448379
// If no segments were created, return the original block as a code segment
449-
if len(bs.segments) == 0 {
380+
if len(segments) == 0 {
450381
return []BlockSegment{{start: start, end: end, hasCode: true}}
451382
}
452383

453-
return bs.segments
454-
}
455-
456-
func isWhitespace(b byte) bool {
457-
return b == ' ' || b == '\t' || b == '\n' || b == '\r'
384+
return segments
458385
}
459386

460387
// findText finds text in the original source, starting at pos.

0 commit comments

Comments
 (0)