Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make lcov parser more accepting of corrupt coverage lines #726

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 71 additions & 68 deletions services/report/languages/lcov.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from collections import defaultdict
from decimal import Decimal
from decimal import Decimal, InvalidOperation
from io import BytesIO

import sentry_sdk
Expand Down Expand Up @@ -34,13 +34,13 @@
def _process_file(
doc: bytes, report_builder_session: ReportBuilderSession
) -> ReportFile | None:
_already_informed_of_negative_execution_count = False
branches: dict[str, dict[str, int]] = defaultdict(dict)
fln, fh = {}, {}
fn_lines: set[str] = set() # lines of function definitions

JS = False
CPP = False
skip_lines: list[str] = []
_file = None
_file: ReportFile | None = None

for encoded_line in BytesIO(doc):
line = encoded_line.decode(errors="replace").rstrip("\n")
Expand All @@ -49,14 +49,15 @@

method, content = line.split(":", 1)
content = content.strip()
if method in ("TN", "LF", "LH", "BRF", "BRH"):
if method in ("TN", "LF", "LH", "FNF", "FNH", "BRF", "BRH", "FNDA"):
# TN: test title
# LF: lines found
# LH: lines hit
# FNF: functions found
# FNH: functions hit
# BRF: branches found
# BRH: branches hit
# FNDA: function data
continue

if method == "SF":
Expand Down Expand Up @@ -86,35 +87,27 @@
if line.startswith("undefined,"):
continue

splitted_content = content.split(",")
line = splitted_content[0]
hit = splitted_content[1]
if line[0] in ("0", "n") or hit[0] in ("=", "s"):
split = content.split(",", 2)
if len(split) < 2:
continue
line_str = split[0]
hit = split[1]

if hit == "undefined" or line == "undefined":
if line_str in ("", "undefined") or hit in ("", "undefined"):
continue
if line_str[0] in ("0", "n") or hit[0] in ("=", "s"):
continue

if hit.isnumeric():
cov = int(hit)
else:
# Huge ints may be expressed in scientific notation.
# int(float(hit)) may lose precision, but Decimal shouldn't.
cov = int(Decimal(hit))

if cov < -1:
# https://github.com/linux-test-project/lcov/commit/dfec606f3b30e1ac0f4114cfb98b29f91e9edb21
if not _already_informed_of_negative_execution_count:
log.warning(
"At least one occurrence of negative execution counts on Lcov",
extra=dict(
execution_count=cov, lcov_report_filename=_file.name
),
)
_already_informed_of_negative_execution_count = True
cov = 0
coverage_line = report_builder_session.create_coverage_line(cov)
_file.append(int(line), coverage_line)
try:
ln = int(line_str)
cov = parse_int(hit)
except (ValueError, InvalidOperation):
continue

cov = max(cov, 0) # clamp to 0

_line = report_builder_session.create_coverage_line(cov)
_file.append(ln, _line)

elif method == "FN" and not JS:
"""
Expand All @@ -123,69 +116,79 @@

FN:<line number of function start>,<function name>
"""
line, name = content.split(",", 1)
if CPP and name[:2] in ("_Z", "_G"):
skip_lines.append(line)
continue

fln[name] = line
split = content.split(",", 1)
if len(split) < 2:
continue
line_str, name = split

elif method == "FNDA" and not JS:
# FNDA:<execution count>,<function name>
hit, name = content.split(",", 1)
if CPP and name[0] == "_":
skip_lines.append(line)
if CPP and name[:2] in ("_Z", "_G"):
skip_lines.append(line_str)
continue

if hit:
if hit.isnumeric():
fh[name] = int(hit)
else:
fh[name] = int(Decimal(hit))
fn_lines.add(line_str)

elif method == "BRDA" and not JS:
"""
Branch coverage information is stored which one line per branch:
Branch coverage information is stored with one line per branch:

BRDA:<line number>,<block number>,<branch number>,<taken>

Block number and branch number are gcc internal IDs for the branch.
Taken is either "-" if the basic block containing the branch was never
Block number and branch number are gcc internal IDs for the branch.
Taken is either "-" if the basic block containing the branch was never
executed or a number indicating how often that branch was taken.
"""
# BRDA:<line number>,<block number>,<branch number>,<taken>
ln, block, branch, taken = content.split(",", 3)
if ln == "1" and _file.name.endswith(".ts"):
split = content.split(",", 3)
if len(split) < 4:
continue

Check warning on line 144 in services/report/languages/lcov.py

View check run for this annotation

Codecov Notifications / codecov/patch

services/report/languages/lcov.py#L144

Added line #L144 was not covered by tests
line_str, block, branch, taken = split

if line_str == "1" and _file.name.endswith(".ts"):
continue

elif ln not in ("0", ""):
branches[ln]["%s:%s" % (block, branch)] = (
elif line_str not in ("0", ""):
branches[line_str]["%s:%s" % (block, branch)] = (
0 if taken in ("-", "0") else 1
)

if _file is None:
return None

# remove skipped
# remove skipped branches
for sl in skip_lines:
branches.pop(sl, None)

methods = fln.values()

# work branches
for ln, br in branches.items():
s, li = sum(br.values()), len(br.values())
mb = [bid for bid, cov in br.items() if cov == 0]
coverage = f"{s}/{li}"
coverage_type = CoverageType.method if ln in methods else CoverageType.branch

_file.append(
int(ln),
report_builder_session.create_coverage_line(
coverage,
coverage_type,
missing_branches=(mb if mb != [] else None),
),
for line_str, br in branches.items():
try:
ln = int(line_str)
except ValueError:
continue

Check warning on line 167 in services/report/languages/lcov.py

View check run for this annotation

Codecov Notifications / codecov/patch

services/report/languages/lcov.py#L166-L167

Added lines #L166 - L167 were not covered by tests

branch_num = len(br.values())
branch_sum = sum(br.values())
missing_branches = [bid for bid, cov in br.items() if cov == 0]

coverage = f"{branch_sum}/{branch_num}"
coverage_type = (
CoverageType.method if line_str in fn_lines else CoverageType.branch
)

_line = report_builder_session.create_coverage_line(
coverage,
coverage_type,
missing_branches=(missing_branches if missing_branches != [] else None),
)
_file.append(ln, _line)

return _file


def parse_int(n: str) -> int:
if n.isnumeric():
return int(n)

# Huge ints may be expressed in scientific notation.
# int(float(hit)) may lose precision, but Decimal shouldn't.
return int(Decimal(n))
67 changes: 47 additions & 20 deletions services/report/languages/tests/unit/test_lcov.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from . import create_report_builder_session

txt = b"""TN:
txt = b"""
TN:
SF:file.js
FNDA:76,jsx
FN:76,(anonymous_1)
Expand Down Expand Up @@ -86,6 +87,36 @@
end_of_record
"""

negative_count = b"""
TN:
SF:file.js
DA:1,1
DA:2,2
DA:3,0
DA:4,-1
DA:5,-5
DA:6,-20
end_of_record
"""

corrupt_txt = b"""
TN:
SF:foo.cpp

DA:1,1

DA:DA:130,0
DA:0,
DA:,0
DA:
DA:not_int,123
DA:123,not_decimal

FN:just_a_name_no_line

end_of_record
"""


class TestLcov(BaseTestCase):
def test_report(self):
Expand All @@ -100,7 +131,7 @@ def fixes(path):
report = report_builder_session.output_report()
processed_report = self.convert_report_to_better_readable(report)

expected_result_archive = {
assert processed_report["archive"] == {
"file.cpp": [
(1, 1, None, [[0, 1, None, None, None]], None, None),
(2, "1/3", "m", [[0, "1/3", ["1:1", "1:3"], None, None]], None, None),
Expand All @@ -121,7 +152,6 @@ def fixes(path):
None,
None,
),
# TODO (Thiago): This is out of order compared to the original, verify what happened
],
"file.js": [
(1, 1, None, [[0, 1, None, None, None]], None, None),
Expand All @@ -130,8 +160,6 @@ def fixes(path):
"file.ts": [(2, 1, None, [[0, 1, None, None, None]], None, None)],
}

assert expected_result_archive == processed_report["archive"]

def test_detect(self):
processor = lcov.LcovProcessor()
assert processor.matches_content(b"hello\nend_of_record\n", "", "") is True
Expand All @@ -140,21 +168,8 @@ def test_detect(self):
assert processor.matches_content(b"", "", "") is False

def test_negative_execution_count(self):
text = "\n".join(
[
"TN:",
"SF:file.js",
"DA:1,1",
"DA:2,2",
"DA:3,0",
"DA:4,-1",
"DA:5,-5",
"DA:6,-20",
"end_of_record",
]
).encode()
report_builder_session = create_report_builder_session()
lcov.from_txt(text, report_builder_session)
lcov.from_txt(negative_count, report_builder_session)
report = report_builder_session.output_report()
processed_report = self.convert_report_to_better_readable(report)

Expand All @@ -163,8 +178,20 @@ def test_negative_execution_count(self):
(1, 1, None, [[0, 1, None, None, None]], None, None),
(2, 2, None, [[0, 2, None, None, None]], None, None),
(3, 0, None, [[0, 0, None, None, None]], None, None),
(4, -1, None, [[0, -1, None, None, None]], None, None),
(4, 0, None, [[0, 0, None, None, None]], None, None),
(5, 0, None, [[0, 0, None, None, None]], None, None),
(6, 0, None, [[0, 0, None, None, None]], None, None),
]
}

def test_skips_corrupted_lines(self):
report_builder_session = create_report_builder_session()
lcov.from_txt(corrupt_txt, report_builder_session)
report = report_builder_session.output_report()
processed_report = self.convert_report_to_better_readable(report)

assert processed_report["archive"] == {
"foo.cpp": [
(1, 1, None, [[0, 1, None, None, None]], None, None),
]
}
Loading