Skip to content

Commit

Permalink
[lcov] Re-implement lcov reports using the same algorithm as XML repo…
Browse files Browse the repository at this point in the history
…rts.

This fixes five serious bugs:

- The first field of a BRDA: line may not be zero (#1846)
- The first field of a BRDA: line is supposed to be the *source* line of a
  branch, not the destination line.
- The fourth field of a BRDA: line is supposed to be “-” when the branch
  was *never reached*, not when it was reached but never taken (which is
  what a branch’s presence in missing_arcs means).  As far as I can tell,
  coverage.py currently doesn’t know of the existence of branches that were
  never reached.

- The decision of whether to emit a DA: line at all is now taken according to
  what’s in analysis.statements, not what’s in analysis.executed and
  analysis.missing; this is important because some lines (e.g. the beginnings
  of docstrings) may appear in analysis.executed but *not* in
  analysis.statements.

- We no longer attempt to call branch-coverage-related Analysis methods
  when analysis.has_arcs is false.

And two minor annoyances:

- DA: and BRDA: lines are now emitted strictly in ascending order by (source)
  line number.
- Source file records are now sorted by *relative* pathname, not absolute
  pathname from the coverage database.
  • Loading branch information
zackw committed Sep 6, 2024
1 parent 7bed122 commit 2815c3c
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 79 deletions.
120 changes: 57 additions & 63 deletions coverage/lcovreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,20 @@ def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float:
self.coverage.get_data()
outfile = outfile or sys.stdout

for fr, analysis in get_analysis_to_report(self.coverage, morfs):
# ensure file records are sorted by the _relative_ filename, not the full path
to_report = [(fr.relative_filename(), fr, analysis)
for fr, analysis in get_analysis_to_report(self.coverage, morfs)]
to_report.sort()

for fname, fr, analysis in to_report:
self.total += analysis.numbers
self.lcov_file(fr, analysis, outfile)
self.lcov_file(fname, fr, analysis, outfile)

return self.total.n_statements and self.total.pc_covered

def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> None:
def lcov_file(self, rel_fname: str,
fr: FileReporter, analysis: Analysis,
outfile: IO[str]) -> None:
"""Produces the lcov data for a single file.
This currently supports both line and branch coverage,
Expand All @@ -86,76 +93,63 @@ def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> N
if self.config.skip_empty:
return

outfile.write(f"SF:{fr.relative_filename()}\n")
if self.checksum_mode == "file":
outfile.write(f"SF:{rel_fname}\n")

source_lines = None
if self.checksum_mode == "line":
source_lines = fr.source().splitlines()
elif self.checksum_mode == "file":
outfile.write(f"VER:{file_hash(fr.source())}\n")

source_lines = fr.source().splitlines()
for covered in sorted(analysis.executed):
if covered in analysis.excluded:
# Do not report excluded as executed
continue

if source_lines:
if covered-1 >= len(source_lines):
break
line = source_lines[covered-1]
else:
line = ""
if self.checksum_mode == "line":
hash_suffix = "," + line_hash(line)
else:
hash_suffix = ""

# Note: Coverage.py currently only supports checking *if* a line
# has been executed, not how many times, so we set this to 1 for
# nice output even if it's technically incorrect.
outfile.write(f"DA:{covered},1{hash_suffix}\n")

for missed in sorted(analysis.missing):
# We don't have to skip excluded lines here, because `missing`
# already doesn't have them.
assert source_lines
line = source_lines[missed-1]
# Emit a DA: record for each line of the file.
lines = sorted(analysis.statements)
hash_suffix = ""
for line in lines:
if self.checksum_mode == "line":
hash_suffix = "," + line_hash(line)
else:
hash_suffix = ""
outfile.write(f"DA:{missed},0{hash_suffix}\n")
hash_suffix = "," + line_hash(source_lines[line-1])
# Q: can we get info about the number of times a statement is
# executed? If so, that should be recorded here.
hit = int(line not in analysis.missing)
outfile.write(f"DA:{line},{hit}{hash_suffix}\n")

if analysis.numbers.n_statements > 0:
outfile.write(f"LF:{analysis.numbers.n_statements}\n")
outfile.write(f"LH:{analysis.numbers.n_executed}\n")

# More information dense branch coverage data.
missing_arcs = analysis.missing_branch_arcs()
executed_arcs = analysis.executed_branch_arcs()
for block_number, block_line_number in enumerate(
sorted(analysis.branch_stats().keys()),
):
for branch_number, line_number in enumerate(
sorted(missing_arcs[block_line_number]),
):
# The exit branches have a negative line number,
# this will not produce valid lcov. Setting
# the line number of the exit branch to 0 will allow
# for valid lcov, while preserving the data.
line_number = max(line_number, 0)
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},-\n")

# The start value below allows for the block number to be
# preserved between these two for loops (stopping the loop from
# resetting the value of the block number to 0).
for branch_number, line_number in enumerate(
sorted(executed_arcs[block_line_number]),
start=len(missing_arcs[block_line_number]),
):
line_number = max(line_number, 0)
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},1\n")

# Summary of the branch coverage.
# More information dense branch coverage data, if available.
if analysis.has_arcs:
branch_stats = analysis.branch_stats()
executed_arcs = analysis.executed_branch_arcs()
missing_arcs = analysis.missing_branch_arcs()

for line in lines:
if line in branch_stats:
# In our data, exit branches have negative destination line numbers.
# The lcov tools will reject these - but the lcov tools consider the
# destinations of branches to be opaque tokens. Use the absolute
# value of the destination line number as the destination block
# number, and its sign as the destination branch number. This will
# ensure destinations are unique and stable, source line numbers are
# always positive, and destination block and branch numbers are always
# nonnegative, which are the properties we need.

# The data we have does not permit us to identify branches that were
# never *reached*, which is what "-" in the hit column means. Such
# branches aren't in either executed_arcs or missing_arcs - we don't
# even know they exist.

# Q: can we get counts of the number of times each arc was executed?
# branch_stats has "total" and "taken" counts but it doesn't have
# "taken" broken down by destination.
arcs = []
arcs.extend((abs(l), int(l <= 0), 1) for l in executed_arcs[line])
arcs.extend((abs(l), int(l <= 0), 0) for l in missing_arcs[line])
arcs.sort()

for block, branch, hit in arcs:
outfile.write(f"BRDA:{line},{block},{branch},{hit}\n")

# Summary of the branch coverage.
brf = sum(t for t, k in branch_stats.values())
brh = brf - sum(t - k for t, k in branch_stats.values())
if brf > 0:
Expand Down
30 changes: 15 additions & 15 deletions tests/test_lcov.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def IsItTrue():
expected_result = textwrap.dedent("""\
SF:main_file.py
DA:1,1
DA:4,1
DA:2,0
DA:4,1
DA:5,0
LF:4
LH:2
Expand Down Expand Up @@ -92,8 +92,8 @@ def IsItTrue():
expected_result = textwrap.dedent("""\
SF:main_file.py
DA:1,1,7URou3io0zReBkk69lEb/Q
DA:4,1,ilhb4KUfytxtEuClijZPlQ
DA:2,0,Xqj6H1iz/nsARMCAbE90ng
DA:4,1,ilhb4KUfytxtEuClijZPlQ
DA:5,0,LWILTcvARcydjFFyo9qM0A
LF:4
LH:2
Expand All @@ -120,8 +120,8 @@ def IsItTrue():
SF:main_file.py
VER:FjGZ0lkufNMCxmG+BA8yvoaqg9xdUOVKi5kpRpUs3c0
DA:1,1
DA:4,1
DA:2,0
DA:4,1
DA:5,0
LF:4
LH:2
Expand All @@ -144,8 +144,8 @@ def test_simple_line_coverage_two_files(self) -> None:
expected_result = textwrap.dedent("""\
SF:main_file.py
DA:1,1
DA:4,1
DA:2,0
DA:4,1
DA:5,0
LF:4
LH:2
Expand Down Expand Up @@ -189,8 +189,8 @@ def is_it_x(x):
DA:5,0
LF:4
LH:1
BRDA:3,0,0,-
BRDA:5,0,1,-
BRDA:2,3,0,0
BRDA:2,5,0,0
BRF:2
BRH:0
end_of_record
Expand Down Expand Up @@ -232,8 +232,8 @@ def test_is_it_x(self):
DA:5,0
LF:4
LH:1
BRDA:3,0,0,-
BRDA:5,0,1,-
BRDA:2,3,0,0
BRDA:2,5,0,0
BRF:2
BRH:0
end_of_record
Expand Down Expand Up @@ -276,8 +276,8 @@ def test_half_covered_branch(self) -> None:
DA:6,0
LF:4
LH:3
BRDA:6,0,0,-
BRDA:4,0,1,1
BRDA:3,4,0,1
BRDA:3,6,0,0
BRF:2
BRH:1
end_of_record
Expand All @@ -288,8 +288,7 @@ def test_half_covered_branch(self) -> None:
def test_empty_init_files(self) -> None:
# Test that in the case of an empty __init__.py file, the lcov
# reporter will note that the file is there, and will note the empty
# line. It will also note the lack of branches, and the checksum for
# the line.
# line. It will also note the lack of branches.
#
# Although there are no lines found, it will note one line as hit in
# old Pythons, and no lines hit in newer Pythons.
Expand Down Expand Up @@ -319,6 +318,7 @@ def test_empty_init_files(self) -> None:
def test_empty_init_file_skipped(self) -> None:
# Test that the lcov reporter honors skip_empty, when this
# is possible (see test_empty_init_files for when it isn't).

self.make_file("__init__.py", "")
self.make_file(".coveragerc", "[report]\nskip_empty = True\n")
self.assert_doesnt_exist(".coverage")
Expand Down Expand Up @@ -361,12 +361,12 @@ def test_excluded_lines(self) -> None:
SF:runme.py
DA:1,1
DA:3,1
DA:6,1
DA:4,0
DA:6,1
LF:4
LH:3
BRDA:4,0,0,-
BRDA:6,0,1,1
BRDA:3,4,0,0
BRDA:3,6,0,1
BRF:2
BRH:1
end_of_record
Expand Down
2 changes: 1 addition & 1 deletion tests/test_report_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ def test_lcov(self) -> None:
expected = textwrap.dedent("""\
SF:good.j2
DA:1,1
DA:3,1
DA:2,0
DA:3,1
LF:3
LH:2
end_of_record
Expand Down

0 comments on commit 2815c3c

Please sign in to comment.