Skip to content

Commit

Permalink
Better error handling for DEP5 parse errors
Browse files Browse the repository at this point in the history
Before, the Debian Copyright object was created lazily. When it is first
needed, it is created. The Projects that were passed to other processes
did not yet have Copyright objects (because they weren't yet needed, and
can't be pickled). However, this sometimes resulted in every process
separately trying to parse the DEP5 file, and failing.

Now, the DEP5 file is parsed immediately instead of being lazily
created. We circumvent the pickling problem by unsetting the attribute
and re-parsing the file in the process. This is a bit stupid, but it
shouldn't result in errors if the DEP5 file is not changed in the
meantime.

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
  • Loading branch information
carmenbianca committed Oct 17, 2023
1 parent 1a0ec4c commit 2d8337b
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 10 deletions.
18 changes: 11 additions & 7 deletions src/reuse/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,15 +242,19 @@ def _parse_dep5(path: StrPath) -> Copyright:
_LOGGER.debug(_("no '{}' file, or could not read it").format(path))
raise
except UnicodeError:
_LOGGER.exception(_("'{}' could not be parsed as utf-8").format(path))
_LOGGER.error(_("'{}' could not be parsed as utf-8").format(path))
raise
# FIXME: I don't really expect the ValueError as a parsing error here. Let's
# make an upstream fix.
except (DebianError, ValueError):
_LOGGER.exception(
# TODO: Remove ValueError once
# <https://salsa.debian.org/python-debian-team/python-debian/-/merge_requests/123>
# is closed
except (DebianError, ValueError) as error:
_LOGGER.error(
_(
"'{}' has syntax errors and could not be parsed as dep5 file"
).format(path)
"'{path}' has syntax errors and could not be parsed as dep5"
" file. We received the following error message:\n"
"\n"
"{message}"
).format(path=path, message=str(error))
)
raise

Expand Down
5 changes: 4 additions & 1 deletion src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,11 @@ def from_directory(
dep5_copyright: Optional[Copyright] = _parse_dep5(
root / ".reuse/dep5"
)
except (OSError, DebianError, UnicodeError, ValueError):
except OSError:
dep5_copyright = None
except (DebianError, UnicodeError, ValueError):
dep5_copyright = None
_LOGGER.warning(_("Proceeding as if no DEP5 file existed."))

meson_build_path = root / "meson.build"
uses_meson = meson_build_path.is_file()
Expand Down
29 changes: 27 additions & 2 deletions src/reuse/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2022 Pietro Albini <pietro.albini@ferrous-systems.com>
# SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER <carmenbianca@fsfe.org>
# SPDX-FileCopyrightText: 2023 Coop IT Easy SC <https://coopiteasy.be>
#
# SPDX-License-Identifier: GPL-3.0-or-later

"""Module that contains reports about files and projects for linting."""

import contextlib
import datetime
import logging
import multiprocessing as mp
Expand All @@ -20,7 +22,7 @@
from uuid import uuid4

from . import __REUSE_version__, __version__
from ._util import _LICENSING, StrPath, _checksum
from ._util import _LICENSING, StrPath, _checksum, _parse_dep5
from .project import Project, ReuseInfo

_LOGGER = logging.getLogger(__name__)
Expand All @@ -34,11 +36,34 @@ class _MultiprocessingContainer:
def __init__(
self, project: Project, do_checksum: bool, add_license_concluded: bool
):
self.project = project
# TODO: We create a copy of the project in the following song-and-dance
# because the debian Copyright object cannot be pickled.
new_project = Project(
project.root,
vcs_strategy=project.vcs_strategy.__class__,
license_map=project.license_map,
licenses=project.licenses.copy(),
# Unset dep5_copyright
dep5_copyright=None,
include_submodules=project.include_submodules,
include_meson_subprojects=project.include_meson_subprojects,
)
new_project.licenses_without_extension = (
project.licenses_without_extension
)

self.project = new_project
# Remember that a dep5_copyright was (or was not) set prior.
self.has_dep5 = bool(project.dep5_copyright)
self.do_checksum = do_checksum
self.add_license_concluded = add_license_concluded

def __call__(self, file_: StrPath) -> "_MultiprocessingResult":
if self.has_dep5:
with contextlib.suppress(Exception):
self.project.dep5_copyright = _parse_dep5(
self.project.root / ".reuse/dep5"
)
# pylint: disable=broad-except
try:
return _MultiprocessingResult(
Expand Down
24 changes: 24 additions & 0 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,4 +398,28 @@ def test_relative_from_root_no_shared_base_path(empty_directory):
) == Path("src/hello.py")


def test_duplicate_field_dep5(empty_directory, caplog):
"""When a duplicate field is in a dep5 file, correctly handle errors."""
dep5_text = cleandoc(
"""
Format: https://example.com/format/1.0
Upstream-Name: Some project
Upstream-Contact: Jane Doe
Source: https://example.com/
Files: foo.py
Copyright: 2017 Jane Doe
Copyright: 2017 John Doe
License: GPL-3.0-or-later
"""
)
(empty_directory / ".reuse").mkdir()
(empty_directory / ".reuse/dep5").write_text(dep5_text)

project = Project.from_directory(empty_directory)
assert project.dep5_copyright is None
assert "syntax errors" in caplog.text
assert 'Duplicate field "Copyright"' in caplog.text


# REUSE-IgnoreEnd

0 comments on commit 2d8337b

Please sign in to comment.