Skip to content

Commit

Permalink
Refactor extract_declared_dependencies parser APIs in preparation for
Browse files Browse the repository at this point in the history
#227 (#229)

* test_extract_declared_dependencies_succes: Harmless test refactoring

Move the dependency_factory() call into the test case instead of in the
parametrized test data. Also remove unnecessary "__" prefix from test
ids.

* extract_declared_dependencies: Refactor API of parser functions

Take a single Path argument instead of separate contents + source. This
allows the parser function itself to choose how to open the file and
pass it to the underlying parser. This is also necessary for the
upcoming rewrite of our requirements.txt parser (#227).

* test_extract_declared_dependencies_pyproject_toml: Remove unnecessary dedent()

* extract_declared_dependencies: Rename parser functions

The rename reflects that these now parse file paths, and not extracted
file contents.

---------

Co-authored-by: Vince Reuter <vince.reuter@gmail.com>
  • Loading branch information
jherland and vreuter authored Mar 15, 2023
1 parent 58308bb commit b3a86d6
Show file tree
Hide file tree
Showing 5 changed files with 323 additions and 365 deletions.
53 changes: 28 additions & 25 deletions fawltydeps/extract_declared_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import logging
import re
import sys
from dataclasses import replace
from functools import partial
from itertools import takewhile
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Callable, Iterable, Iterator, NamedTuple, Optional, Set, Tuple

from pkg_resources import Requirement
Expand Down Expand Up @@ -56,17 +58,16 @@ def parse_one_req(req_text: str, source: Location) -> DeclaredDependency:
return DeclaredDependency(req_name, source)


def parse_requirements_contents(
text: str, source: Location
) -> Iterator[DeclaredDependency]:
def parse_requirements_txt(path: Path) -> Iterator[DeclaredDependency]:
"""Extract dependencies (packages names) from a requirements file.
This is usually a requirements.txt file or any other file following the
Requirements File Format as documented here:
https://pip.pypa.io/en/stable/reference/requirements-file-format/.
"""
source = Location(path)
parse_one = partial(parse_one_req, source=source)
for line in text.splitlines():
for line in path.read_text().splitlines():
cleaned = line.lstrip()
if (
not cleaned # skip empty lines
Expand All @@ -91,7 +92,7 @@ def parse_requirements_contents(
logger.warning(f"Could not parse {source} line {line!r}: {exc}")


def parse_setup_contents(text: str, source: Location) -> Iterator[DeclaredDependency]:
def parse_setup_py(path: Path) -> Iterator[DeclaredDependency]:
"""Extract dependencies (package names) from setup.py.
This file can contain arbitrary Python code, and simply executing it has
Expand All @@ -100,7 +101,7 @@ def parse_setup_contents(text: str, source: Location) -> Iterator[DeclaredDepend
the `install_requires` and `extras_require` keyword args from that function
call.
"""

source = Location(path)
# Attempt to keep track of simple variable assignments (name -> value)
# declared in the setup.py prior to the setup() call, so that we can
# resolve any variable references in the arguments to the setup() call.
Expand Down Expand Up @@ -139,7 +140,7 @@ def _is_setup_function_call(node: ast.AST) -> bool:
and node.value.func.id == "setup"
)

setup_contents = ast.parse(text, filename=str(source.path))
setup_contents = ast.parse(path.read_text(), filename=str(source.path))
for node in ast.walk(setup_contents):
tracked_vars.evaluate(node)
if _is_setup_function_call(node):
Expand All @@ -149,9 +150,7 @@ def _is_setup_function_call(node: ast.AST) -> bool:
break


def parse_setup_cfg_contents(
text: str, source: Location
) -> Iterator[DeclaredDependency]:
def parse_setup_cfg(path: Path) -> Iterator[DeclaredDependency]:
"""Extract dependencies (package names) from setup.cfg.
`ConfigParser` basic building blocks are "sections"
Expand All @@ -163,16 +162,22 @@ def parse_setup_cfg_contents(
The declaration uses `section` + `option` syntax where section may be [options]
or [options.{requirements_type}].
"""
source = Location(path)
parser = configparser.ConfigParser()
try:
parser.read_string(text)
parser.read([path])
except configparser.Error as exc:
logger.debug(exc)
logger.error("Could not parse contents of `%s`", source)
return

def parse_value(value: str) -> Iterator[DeclaredDependency]:
yield from parse_requirements_contents(value, source=source)
# Ugly hack since parse_requirements_txt() accepts only a path:
with NamedTemporaryFile(mode="wt") as tmp:
tmp.write(value)
tmp.flush()
for dep in parse_requirements_txt(Path(tmp.name)):
yield replace(dep, source=source)

def extract_section(section: str) -> Iterator[DeclaredDependency]:
if section in parser:
Expand Down Expand Up @@ -288,17 +293,17 @@ def parse_pyproject_elements(
)


def parse_pyproject_contents(
text: str, source: Location
) -> Iterator[DeclaredDependency]:
def parse_pyproject_toml(path: Path) -> Iterator[DeclaredDependency]:
"""Extract dependencies (package names) from pyproject.toml.
There are multiple ways to declare dependencies inside a pyproject.toml.
We currently handle:
- PEP 621 core metadata fields
- Poetry-specific metadata in `tool.poetry` sections.
"""
parsed_contents = tomllib.loads(text)
source = Location(path)
with path.open("rb") as tomlfile:
parsed_contents = tomllib.load(tomlfile)

yield from parse_pep621_pyproject_contents(parsed_contents, source)

Expand All @@ -314,7 +319,7 @@ class ParsingStrategy(NamedTuple):
"""Named pairing of an applicability criterion and a dependency parser"""

applies_to_path: Callable[[Path], bool]
execute: Callable[[str, Location], Iterator[DeclaredDependency]]
execute: Callable[[Path], Iterator[DeclaredDependency]]


def first_applicable_parser(
Expand All @@ -333,18 +338,18 @@ def first_applicable_parser(

PARSER_CHOICES = {
ParserChoice.PYPROJECT_TOML: ParsingStrategy(
lambda path: path.name == "pyproject.toml", parse_pyproject_contents
lambda path: path.name == "pyproject.toml", parse_pyproject_toml
),
ParserChoice.REQUIREMENTS_TXT: ParsingStrategy(
lambda path: re.compile(r".*\brequirements\b.*\.(txt|in)").match(path.name)
is not None,
parse_requirements_contents,
parse_requirements_txt,
),
ParserChoice.SETUP_CFG: ParsingStrategy(
lambda path: path.name == "setup.cfg", parse_setup_cfg_contents
lambda path: path.name == "setup.cfg", parse_setup_cfg
),
ParserChoice.SETUP_PY: ParsingStrategy(
lambda path: path.name == "setup.py", parse_setup_contents
lambda path: path.name == "setup.py", parse_setup_py
),
}

Expand Down Expand Up @@ -375,7 +380,7 @@ def extract_declared_dependencies_from_path(
ctx="Parsing given dependencies path isn't supported", path=path
)
parser = choice_and_parser[1]
yield from parser.execute(path.read_text(), Location(path))
yield from parser.execute(path)
elif path.is_dir():
logger.debug("Extracting dependencies from files under %s", path)
for file in walk_dir(path):
Expand All @@ -384,9 +389,7 @@ def extract_declared_dependencies_from_path(
continue
if parser_choice is None or choice_and_parser[0] == parser_choice:
logger.debug(f"Extracting dependencies: {file}")
yield from choice_and_parser[1].execute(
file.read_text(), Location(file)
)
yield from choice_and_parser[1].execute(file)
else:
raise UnparseablePathException(
ctx="Dependencies declaration path is neither dir nor file", path=path
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def lint(session):
session.run("pylint", "fawltydeps")
session.run(
"pylint",
"--disable=missing-function-docstring,invalid-name,redefined-outer-name,too-many-lines",
"--disable=missing-function-docstring,invalid-name,redefined-outer-name,too-many-lines,too-many-arguments",
"tests",
)

Expand Down
39 changes: 21 additions & 18 deletions tests/test_extract_declared_dependencies_errors.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
"""Test unhappy path, where parsing of dependencies fails"""

import logging
from pathlib import Path
from textwrap import dedent

import pytest

from fawltydeps.extract_declared_dependencies import (
extract_declared_dependencies,
parse_setup_cfg_contents,
parse_setup_contents,
parse_setup_cfg,
parse_setup_py,
)
from fawltydeps.types import Location, UnparseablePathException

Expand All @@ -25,20 +23,22 @@ def test_extract_declared_dependencies__unsupported_file__raises_error(
)


def test_parse_setup_cfg_contents__malformed__logs_error(caplog):
setup_contents = dedent(
"""\
[options
install_requires =
pandas
"""
def test_parse_setup_cfg__malformed__logs_error(write_tmp_files, caplog):
tmp_path = write_tmp_files(
{
"setup.cfg": """\
[options
install_requires =
pandas
""",
}
)
expected = []
caplog.set_level(logging.ERROR)

source = Location(Path("setup.cfg"))
result = list(parse_setup_cfg_contents(setup_contents, source))
assert f"Could not parse contents of `{source}`" in caplog.text
path = tmp_path / "setup.cfg"
result = list(parse_setup_cfg(path))
assert f"Could not parse contents of `{Location(path)}`" in caplog.text
assert expected == result


Expand Down Expand Up @@ -124,11 +124,14 @@ def test_parse_setup_cfg_contents__malformed__logs_error(caplog):
),
],
)
def test_parse_setup_contents__cannot_parse__logs_warning(
caplog, code, expect, fail_arg
def test_parse_setup_py__cannot_parse__logs_warning(
write_tmp_files, caplog, code, expect, fail_arg
):
tmp_path = write_tmp_files({"setup.py": code})
path = tmp_path / "setup.py"

caplog.set_level(logging.WARNING)
result = list(parse_setup_contents(dedent(code), Location(Path("setup.py"))))
result = list(parse_setup_py(path))
assert f"Could not parse contents of `{fail_arg}`" in caplog.text
assert "setup.py" in caplog.text
assert str(path) in caplog.text
assert expect == result
Loading

0 comments on commit b3a86d6

Please sign in to comment.