[lcov] Re-implement lcov reports using the same algorithm as XML repo…

…rts. This fixes five serious bugs: - The first field of a BRDA: line may not be zero (#1846) - The first field of a BRDA: line is supposed to be the *source* line of a branch, not the destination line. - The fourth field of a BRDA: line is supposed to be “-” when the branch was *never reached*, not when it was reached but never taken (which is what a branch’s presence in missing_arcs means). As far as I can tell, coverage.py currently doesn’t know of the existence of branches that were never reached. - The decision of whether to emit a DA: line at all is now taken according to what’s in analysis.statements, not what’s in analysis.executed and analysis.missing; this is important because some lines (e.g. the beginnings of docstrings) may appear in analysis.executed but *not* in analysis.statements. - We no longer attempt to call branch-coverage-related Analysis methods when analysis.has_arcs is false. And two minor annoyances: - DA: and BRDA: lines are now emitted strictly in ascending order by (source) line number. - Source file records are now sorted by *relative* pathname, not absolute pathname from the coverage database.
nedbat · Sep 6, 2024 · 2815c3c · 2815c3c
1 parent 7bed122
commit 2815c3c
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 79 deletions.
diff --git a/coverage/lcovreport.py b/coverage/lcovreport.py
@@ -70,13 +70,20 @@ def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float:
         self.coverage.get_data()
         outfile = outfile or sys.stdout
 
-        for fr, analysis in get_analysis_to_report(self.coverage, morfs):
+        # ensure file records are sorted by the _relative_ filename, not the full path
+        to_report = [(fr.relative_filename(), fr, analysis)
+                     for fr, analysis in get_analysis_to_report(self.coverage, morfs)]
+        to_report.sort()
+
+        for fname, fr, analysis in to_report:
             self.total += analysis.numbers
-            self.lcov_file(fr, analysis, outfile)
+            self.lcov_file(fname, fr, analysis, outfile)
 
         return self.total.n_statements and self.total.pc_covered
 
-    def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> None:
+    def lcov_file(self, rel_fname: str,
+                  fr: FileReporter, analysis: Analysis,
+                  outfile: IO[str]) -> None:
         """Produces the lcov data for a single file.
 
         This currently supports both line and branch coverage,
@@ -86,76 +93,63 @@ def lcov_file(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> N
             if self.config.skip_empty:
                 return
 
-        outfile.write(f"SF:{fr.relative_filename()}\n")
-        if self.checksum_mode == "file":
+        outfile.write(f"SF:{rel_fname}\n")
+
+        source_lines = None
+        if self.checksum_mode == "line":
+            source_lines = fr.source().splitlines()
+        elif self.checksum_mode == "file":
             outfile.write(f"VER:{file_hash(fr.source())}\n")
 
-        source_lines = fr.source().splitlines()
-        for covered in sorted(analysis.executed):
-            if covered in analysis.excluded:
-                # Do not report excluded as executed
-                continue
-
-            if source_lines:
-                if covered-1 >= len(source_lines):
-                    break
-                line = source_lines[covered-1]
-            else:
-                line = ""
-            if self.checksum_mode == "line":
-                hash_suffix = "," + line_hash(line)
-            else:
-                hash_suffix = ""
-
-            # Note: Coverage.py currently only supports checking *if* a line
-            # has been executed, not how many times, so we set this to 1 for
-            # nice output even if it's technically incorrect.
-            outfile.write(f"DA:{covered},1{hash_suffix}\n")
-
-        for missed in sorted(analysis.missing):
-            # We don't have to skip excluded lines here, because `missing`
-            # already doesn't have them.
-            assert source_lines
-            line = source_lines[missed-1]
+        # Emit a DA: record for each line of the file.
+        lines = sorted(analysis.statements)
+        hash_suffix = ""
+        for line in lines:
             if self.checksum_mode == "line":
-                hash_suffix = "," + line_hash(line)
-            else:
-                hash_suffix = ""
-            outfile.write(f"DA:{missed},0{hash_suffix}\n")
+                hash_suffix = "," + line_hash(source_lines[line-1])
+            # Q: can we get info about the number of times a statement is
+            # executed?  If so, that should be recorded here.
+            hit = int(line not in analysis.missing)
+            outfile.write(f"DA:{line},{hit}{hash_suffix}\n")
 
         if analysis.numbers.n_statements > 0:
             outfile.write(f"LF:{analysis.numbers.n_statements}\n")
             outfile.write(f"LH:{analysis.numbers.n_executed}\n")
 
-        # More information dense branch coverage data.
-        missing_arcs = analysis.missing_branch_arcs()
-        executed_arcs = analysis.executed_branch_arcs()
-        for block_number, block_line_number in enumerate(
-            sorted(analysis.branch_stats().keys()),
-        ):
-            for branch_number, line_number in enumerate(
-                sorted(missing_arcs[block_line_number]),
-            ):
-                # The exit branches have a negative line number,
-                # this will not produce valid lcov. Setting
-                # the line number of the exit branch to 0 will allow
-                # for valid lcov, while preserving the data.
-                line_number = max(line_number, 0)
-                outfile.write(f"BRDA:{line_number},{block_number},{branch_number},-\n")
-
-            # The start value below allows for the block number to be
-            # preserved between these two for loops (stopping the loop from
-            # resetting the value of the block number to 0).
-            for branch_number, line_number in enumerate(
-                sorted(executed_arcs[block_line_number]),
-                start=len(missing_arcs[block_line_number]),
-            ):
-                line_number = max(line_number, 0)
-                outfile.write(f"BRDA:{line_number},{block_number},{branch_number},1\n")
-
-        # Summary of the branch coverage.
+        # More information dense branch coverage data, if available.
         if analysis.has_arcs:
             branch_stats = analysis.branch_stats()
+            executed_arcs = analysis.executed_branch_arcs()
+            missing_arcs = analysis.missing_branch_arcs()
+
+            for line in lines:
+                if line in branch_stats:
+                    # In our data, exit branches have negative destination line numbers.
+                    # The lcov tools will reject these - but the lcov tools consider the
+                    # destinations of branches to be opaque tokens.  Use the absolute
+                    # value of the destination line number as the destination block
+                    # number, and its sign as the destination branch number.  This will
+                    # ensure destinations are unique and stable, source line numbers are
+                    # always positive, and destination block and branch numbers are always
+                    # nonnegative, which are the properties we need.
+
+                    # The data we have does not permit us to identify branches that were
+                    # never *reached*, which is what "-" in the hit column means.  Such
+                    # branches aren't in either executed_arcs or missing_arcs - we don't
+                    # even know they exist.
+
+                    # Q: can we get counts of the number of times each arc was executed?
+                    # branch_stats has "total" and "taken" counts but it doesn't have
+                    # "taken" broken down by destination.
+                    arcs = []
+                    arcs.extend((abs(l), int(l <= 0), 1) for l in executed_arcs[line])
+                    arcs.extend((abs(l), int(l <= 0), 0) for l in missing_arcs[line])
+                    arcs.sort()
+
+                    for block, branch, hit in arcs:
+                        outfile.write(f"BRDA:{line},{block},{branch},{hit}\n")
+
+            # Summary of the branch coverage.
             brf = sum(t for t, k in branch_stats.values())
             brh = brf - sum(t - k for t, k in branch_stats.values())
             if brf > 0:

diff --git a/tests/test_lcov.py b/tests/test_lcov.py
@@ -60,8 +60,8 @@ def IsItTrue():
         expected_result = textwrap.dedent("""\
             SF:main_file.py
             DA:1,1
-            DA:4,1
             DA:2,0
+            DA:4,1
             DA:5,0
             LF:4
             LH:2
@@ -92,8 +92,8 @@ def IsItTrue():
         expected_result = textwrap.dedent("""\
             SF:main_file.py
             DA:1,1,7URou3io0zReBkk69lEb/Q
-            DA:4,1,ilhb4KUfytxtEuClijZPlQ
             DA:2,0,Xqj6H1iz/nsARMCAbE90ng
+            DA:4,1,ilhb4KUfytxtEuClijZPlQ
             DA:5,0,LWILTcvARcydjFFyo9qM0A
             LF:4
             LH:2
@@ -120,8 +120,8 @@ def IsItTrue():
             SF:main_file.py
             VER:FjGZ0lkufNMCxmG+BA8yvoaqg9xdUOVKi5kpRpUs3c0
             DA:1,1
-            DA:4,1
             DA:2,0
+            DA:4,1
             DA:5,0
             LF:4
             LH:2
@@ -144,8 +144,8 @@ def test_simple_line_coverage_two_files(self) -> None:
         expected_result = textwrap.dedent("""\
             SF:main_file.py
             DA:1,1
-            DA:4,1
             DA:2,0
+            DA:4,1
             DA:5,0
             LF:4
             LH:2
@@ -189,8 +189,8 @@ def is_it_x(x):
             DA:5,0
             LF:4
             LH:1
-            BRDA:3,0,0,-
-            BRDA:5,0,1,-
+            BRDA:2,3,0,0
+            BRDA:2,5,0,0
             BRF:2
             BRH:0
             end_of_record
@@ -232,8 +232,8 @@ def test_is_it_x(self):
             DA:5,0
             LF:4
             LH:1
-            BRDA:3,0,0,-
-            BRDA:5,0,1,-
+            BRDA:2,3,0,0
+            BRDA:2,5,0,0
             BRF:2
             BRH:0
             end_of_record
@@ -276,8 +276,8 @@ def test_half_covered_branch(self) -> None:
             DA:6,0
             LF:4
             LH:3
-            BRDA:6,0,0,-
-            BRDA:4,0,1,1
+            BRDA:3,4,0,1
+            BRDA:3,6,0,0
             BRF:2
             BRH:1
             end_of_record
@@ -288,8 +288,7 @@ def test_half_covered_branch(self) -> None:
     def test_empty_init_files(self) -> None:
         # Test that in the case of an empty __init__.py file, the lcov
         # reporter will note that the file is there, and will note the empty
-        # line. It will also note the lack of branches, and the checksum for
-        # the line.
+        # line. It will also note the lack of branches.
         #
         # Although there are no lines found, it will note one line as hit in
         # old Pythons, and no lines hit in newer Pythons.
@@ -319,6 +318,7 @@ def test_empty_init_files(self) -> None:
     def test_empty_init_file_skipped(self) -> None:
         # Test that the lcov reporter honors skip_empty, when this
         # is possible (see test_empty_init_files for when it isn't).
+
         self.make_file("__init__.py", "")
         self.make_file(".coveragerc", "[report]\nskip_empty = True\n")
         self.assert_doesnt_exist(".coverage")
@@ -361,12 +361,12 @@ def test_excluded_lines(self) -> None:
             SF:runme.py
             DA:1,1
             DA:3,1
-            DA:6,1
             DA:4,0
+            DA:6,1
             LF:4
             LH:3
-            BRDA:4,0,0,-
-            BRDA:6,0,1,1
+            BRDA:3,4,0,0
+            BRDA:3,6,0,1
             BRF:2
             BRH:1
             end_of_record

diff --git a/tests/test_report_common.py b/tests/test_report_common.py
@@ -270,8 +270,8 @@ def test_lcov(self) -> None:
         expected = textwrap.dedent("""\
             SF:good.j2
             DA:1,1
-            DA:3,1
             DA:2,0
+            DA:3,1
             LF:3
             LH:2
             end_of_record