@@ -13,6 +13,7 @@ import (
1313 "fmt"
1414 "go/ast"
1515 "go/parser"
16+ "go/scanner"
1617 "go/token"
1718 "internal/coverage"
1819 "internal/coverage/encodemeta"
@@ -274,86 +275,10 @@ type BlockSegment struct {
274275 hasCode bool // true if this segment contains executable code
275276}
276277
277- // blockSplitter holds the state for splitting a block by comments.
278- // It tracks cursor position, line state, and section boundaries
279- // while scanning through source code character by character.
280- type blockSplitter struct {
281- // Source information
282- file * token.File
283- startOffset int
284-
285- // Accumulated results
286- segments []BlockSegment
287-
288- // Cursor position
289- i int
290- indexStartCurrentLine int
291- currentOffset int
292- currentSegmentStart token.Pos
293-
294- // Section state (persists across lines)
295- inCommentedSection bool
296-
297- // Line state (reset each line)
298- lineHasCode bool
299- lineHasComment bool
300-
301- // Cursor state (tracks what we're inside)
302- inSingleLineComment bool
303- inMultiLineComment bool
304- inString bool
305- inRawString bool
306- }
307-
308- // processLine handles end-of-line processing: computes state transitions
309- // and decides whether to create a new segment based on code/comment boundaries.
310- func (bs * blockSplitter ) processLine () {
311- lineStart := bs .currentOffset
312- lineEnd := bs .currentOffset + (bs .i - bs .indexStartCurrentLine )
313-
314- if bs .inCommentedSection && bs .lineHasCode {
315- // End of commented section, start of code section
316- segmentEnd := bs .file .Pos (lineStart )
317- bs .segments = append (bs .segments , BlockSegment {
318- start : bs .currentSegmentStart ,
319- end : segmentEnd ,
320- hasCode : false ,
321- })
322- bs .currentSegmentStart = bs .file .Pos (lineStart )
323- bs .inCommentedSection = false
324- } else if ! bs .inCommentedSection && ! bs .lineHasCode && bs .lineHasComment {
325- // End of code section, start of commented section
326- segmentEnd := bs .file .Pos (lineStart )
327- if bs .currentSegmentStart < segmentEnd {
328- bs .segments = append (bs .segments , BlockSegment {
329- start : bs .currentSegmentStart ,
330- end : segmentEnd ,
331- hasCode : true ,
332- })
333- }
334- bs .currentSegmentStart = bs .file .Pos (lineStart )
335- bs .inCommentedSection = true
336- }
337-
338- bs .currentOffset = lineEnd
339- }
340-
341- // resetLineState resets line-specific state for a new line.
342- func (bs * blockSplitter ) resetLineState () {
343- bs .indexStartCurrentLine = bs .i
344- bs .lineHasComment = bs .inMultiLineComment
345- bs .lineHasCode = false
346- bs .inSingleLineComment = false
347- }
348-
349- // inStringOrComment returns true if currently inside a string or comment.
350- func (bs * blockSplitter ) inStringOrComment () bool {
351- return bs .inString || bs .inRawString || bs .inSingleLineComment || bs .inMultiLineComment
352- }
353-
354278// splitBlockByComments analyzes a block range and splits it into segments,
355279// separating executable code from any commented lines.
356- // To do this, it reads character by character the original source code.
280+ // It uses go/scanner to tokenize the source and identify which lines
281+ // contain executable code vs. only comments.
357282func (f * File ) splitBlockByComments (start , end token.Pos ) []BlockSegment {
358283 startOffset := f .offset (start )
359284 endOffset := f .offset (end )
@@ -362,99 +287,101 @@ func (f *File) splitBlockByComments(start, end token.Pos) []BlockSegment {
362287 return []BlockSegment {{start : start , end : end , hasCode : true }}
363288 }
364289
365- originalSourceCode := f .content [startOffset :endOffset ]
290+ src := f .content [startOffset :endOffset ]
291+ origFile := f .fset .File (start )
366292
367- bs := & blockSplitter {
368- file : f .fset .File (start ),
369- startOffset : startOffset ,
370- currentOffset : startOffset ,
371- currentSegmentStart : start ,
372- }
293+ // Create a new file set for scanning this block
294+ fset := token .NewFileSet ()
295+ file := fset .AddFile ("" , - 1 , len (src ))
373296
374- for bs . i < len ( originalSourceCode ) {
375- char := originalSourceCode [ bs . i ]
297+ var s scanner. Scanner
298+ s . Init ( file , src , nil , scanner . ScanComments )
376299
377- if char == '\\' && bs .inString && bs .i + 1 < len (originalSourceCode ) {
378- bs .lineHasCode = true
379- bs .i += 2 // Skip escaped character
380- continue
381- }
300+ // Track which lines have code vs only comments
301+ lineHasCode := make (map [int ]bool )
302+ lineHasComment := make (map [int ]bool )
382303
383- if char == '"' && ! bs .inRawString && ! bs .inSingleLineComment && ! bs .inMultiLineComment {
384- bs .lineHasCode = true
385- bs .inString = ! bs .inString
386- bs .i ++
387- continue
304+ for {
305+ pos , tok , lit := s .Scan ()
306+ if tok == token .EOF {
307+ break
388308 }
389309
390- if char == '`' && ! bs .inString && ! bs .inSingleLineComment && ! bs .inMultiLineComment {
391- bs .lineHasCode = true
392- bs .inRawString = ! bs .inRawString
393- bs .i ++
394- continue
310+ // Calculate start and end lines using the standard pattern from go/ast
311+ // (e.g., ast.Comment.End() returns pos + len(text))
312+ startLine := file .Line (pos )
313+ endLine := file .Line (pos + token .Pos (len (lit )) - 1 )
314+ if endLine < startLine {
315+ endLine = startLine
395316 }
396317
397- if char == '\n' {
398- bs .i ++
399- bs .processLine ()
400- bs .resetLineState ()
401- continue
402- }
403-
404- if bs .i + 1 < len (originalSourceCode ) {
405- nextChar := originalSourceCode [bs .i + 1 ]
406- if char == '/' && nextChar == '/' && ! bs .inString && ! bs .inRawString && ! bs .inMultiLineComment {
407- bs .inSingleLineComment = true
408- bs .lineHasComment = true
409- bs .i += 2
410- continue
411- }
412-
413- if char == '/' && nextChar == '*' && ! bs .inString && ! bs .inRawString && ! bs .inSingleLineComment {
414- bs .inMultiLineComment = true
415- bs .lineHasComment = true
416- bs .i += 2
417- continue
318+ if tok == token .COMMENT {
319+ // Mark all lines spanned by this comment
320+ for line := startLine ; line <= endLine ; line ++ {
321+ lineHasComment [line ] = true
418322 }
419-
420- if char == '*' && nextChar == '/' && bs .inMultiLineComment {
421- bs .inMultiLineComment = false
422- bs .lineHasComment = true
423- bs .i += 2
424- continue
323+ } else {
324+ // Mark all lines spanned by this token as having code
325+ for line := startLine ; line <= endLine ; line ++ {
326+ lineHasCode [line ] = true
425327 }
426328 }
329+ }
427330
428- // If we matched nothing else and the char is not a whitespace, we are in normal code.
429- if ! bs .lineHasCode && ! isWhitespace (char ) && ! bs .inSingleLineComment && ! bs .inMultiLineComment {
430- bs .lineHasCode = true
331+ // Build segments based on line transitions
332+ // The scanner has already built the line table in file, so we can use
333+ // file.LineStart() to get positions directly instead of manual calculation.
334+ var segments []BlockSegment
335+ var currentSegmentStart token.Pos = start
336+ inCommentedSection := false
337+
338+ totalLines := file .LineCount ()
339+ for line := 1 ; line <= totalLines ; line ++ {
340+ hasCode := lineHasCode [line ]
341+ hasComment := lineHasComment [line ]
342+
343+ if inCommentedSection && hasCode {
344+ // End of commented section, start of code section
345+ lineOffset := file .Offset (file .LineStart (line ))
346+ segmentEnd := origFile .Pos (startOffset + lineOffset )
347+ segments = append (segments , BlockSegment {
348+ start : currentSegmentStart ,
349+ end : segmentEnd ,
350+ hasCode : false ,
351+ })
352+ currentSegmentStart = segmentEnd
353+ inCommentedSection = false
354+ } else if ! inCommentedSection && ! hasCode && hasComment {
355+ // End of code section, start of commented section
356+ lineOffset := file .Offset (file .LineStart (line ))
357+ segmentEnd := origFile .Pos (startOffset + lineOffset )
358+ if currentSegmentStart < segmentEnd {
359+ segments = append (segments , BlockSegment {
360+ start : currentSegmentStart ,
361+ end : segmentEnd ,
362+ hasCode : true ,
363+ })
364+ }
365+ currentSegmentStart = segmentEnd
366+ inCommentedSection = true
431367 }
432-
433- bs .i ++
434368 }
435369
436- // Process the last line if it doesn't end with a newline
437- bs .processLine ()
438-
439370 // Add the final segment
440- if bs . currentSegmentStart < end {
441- bs . segments = append (bs . segments , BlockSegment {
442- start : bs . currentSegmentStart ,
371+ if currentSegmentStart < end {
372+ segments = append (segments , BlockSegment {
373+ start : currentSegmentStart ,
443374 end : end ,
444- hasCode : ! bs . inCommentedSection ,
375+ hasCode : ! inCommentedSection ,
445376 })
446377 }
447378
448379 // If no segments were created, return the original block as a code segment
449- if len (bs . segments ) == 0 {
380+ if len (segments ) == 0 {
450381 return []BlockSegment {{start : start , end : end , hasCode : true }}
451382 }
452383
453- return bs .segments
454- }
455-
456- func isWhitespace (b byte ) bool {
457- return b == ' ' || b == '\t' || b == '\n' || b == '\r'
384+ return segments
458385}
459386
460387// findText finds text in the original source, starting at pos.
0 commit comments