Skip to content

Commit

Permalink
refactor(dependency_getter): extract requirements files extraction (#886
Browse files Browse the repository at this point in the history
)

* refactor(dependency_getter): extract requirements file extraction

* refactor(dependency_getter): extract requirements files extraction

* test(utils): remove obsolete `FUTURE_DEPRECATED_OBSOLETE_ARGUMENT`

* test(depencency_getter): add unsupported spec case
  • Loading branch information
mkniewallner authored Oct 15, 2024
1 parent 3fc6405 commit 458eff9
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 86 deletions.
178 changes: 96 additions & 82 deletions python/deptry/dependency_getter/requirements_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
import re
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.parse import urlparse

from deptry.dependency import Dependency
from deptry.dependency_getter.base import DependenciesExtract, DependencyGetter

if TYPE_CHECKING:
from collections.abc import Mapping, Sequence


@dataclass
class RequirementsTxtDependencyGetter(DependencyGetter):
Expand All @@ -20,23 +24,13 @@ class RequirementsTxtDependencyGetter(DependencyGetter):
requirements_files_dev: tuple[str, ...] = ("dev-requirements.txt", "requirements-dev.txt")

def get(self) -> DependenciesExtract:
dependencies = list(
itertools.chain(
*(self._get_dependencies_from_requirements_files(file_name) for file_name in self.requirements_files)
)
)

dev_dependencies = list(
itertools.chain(
*(
self._get_dependencies_from_requirements_files(file_name)
for file_name in self._scan_for_dev_requirements_files()
)
)
return DependenciesExtract(
get_dependencies_from_requirements_files(self.requirements_files, self.package_module_name_map),
get_dependencies_from_requirements_files(
self._scan_for_dev_requirements_files(), self.package_module_name_map
),
)

return DependenciesExtract(dependencies, dev_dependencies)

def _scan_for_dev_requirements_files(self) -> list[str]:
"""
Check if any of the files passed as requirements_files_dev exist, and if so; return them.
Expand All @@ -46,83 +40,103 @@ def _scan_for_dev_requirements_files(self) -> list[str]:
logging.debug("Found files with development requirements! %s", dev_requirements_files)
return dev_requirements_files

def _get_dependencies_from_requirements_files(self, file_name: str, is_dev: bool = False) -> list[Dependency]:
logging.debug("Scanning %s for %s", file_name, "dev dependencies" if is_dev else "dependencies")
dependencies = []

file_path = Path(file_name)
def get_dependencies_from_requirements_files(
file_names: Sequence[str], package_module_name_map: Mapping[str, Sequence[str]], is_dev: bool = False
) -> list[Dependency]:
return list(
itertools.chain(
*(
get_dependencies_from_requirements_file(file_name, package_module_name_map, is_dev)
for file_name in file_names
)
)
)


def get_dependencies_from_requirements_file(
file_name: str, package_module_name_map: Mapping[str, Sequence[str]], is_dev: bool = False
) -> list[Dependency]:
logging.debug("Scanning %s for %s", file_name, "dev dependencies" if is_dev else "dependencies")
dependencies = []

file_path = Path(file_name)

with file_path.open() as f:
data = f.readlines()

for line in data:
dependency = _extract_dependency_from_line(line, file_path, package_module_name_map)
if dependency:
dependencies.append(dependency)

return dependencies


def _extract_dependency_from_line(
line: str, file_path: Path, package_module_name_map: Mapping[str, Sequence[str]]
) -> Dependency | None:
"""
Extract a dependency from a single line of a requirements.txt file.
"""
line = _remove_comments_from(line)
line = _remove_newlines_from(line)
name = _find_dependency_name_in(line)
if name:
return Dependency(
name=name,
definition_file=file_path,
module_names=package_module_name_map.get(name),
)
else:
return None

with file_path.open() as f:
data = f.readlines()

for line in data:
dependency = self._extract_dependency_from_line(line, file_path)
if dependency:
dependencies.append(dependency)
def _find_dependency_name_in(line: str) -> str | None:
"""
Find the dependency name of a dependency specified according to the pip-standards for requirement.txt
"""
if _line_is_url(line):
return _extract_name_from_url(line)
else:
match = re.search("^[^-][a-zA-Z0-9-_]+", line)
if match:
return match.group(0)
return None

return dependencies

def _extract_dependency_from_line(self, line: str, file_path: Path) -> Dependency | None:
"""
Extract a dependency from a single line of a requirements.txt file.
"""
line = self._remove_comments_from(line)
line = self._remove_newlines_from(line)
name = self._find_dependency_name_in(line)
if name:
return Dependency(
name=name,
definition_file=file_path,
module_names=self.package_module_name_map.get(name),
)
else:
return None
def _remove_comments_from(line: str) -> str:
"""
Removes comments from a line. A comment is defined as any text
following a '#' that is either at the start of the line or preceded by a space.
This ensures that fragments like '#egg=' in URLs are not mistakenly removed.
"""
return re.sub(r"(?<!\S)#.*", "", line).strip()

def _find_dependency_name_in(self, line: str) -> str | None:
"""
Find the dependency name of a dependency specified according to the pip-standards for requirement.txt
"""
if self._line_is_url(line):
return self._extract_name_from_url(line)
else:
match = re.search("^[^-][a-zA-Z0-9-_]+", line)
if match:
return match.group(0)
return None

@staticmethod
def _remove_comments_from(line: str) -> str:
"""
Removes comments from a line. A comment is defined as any text
following a '#' that is either at the start of the line or preceded by a space.
This ensures that fragments like '#egg=' in URLs are not mistakenly removed.
"""
return re.sub(r"(?<!\S)#.*", "", line).strip()
def _remove_newlines_from(line: str) -> str:
return line.replace("\n", "")

@staticmethod
def _remove_newlines_from(line: str) -> str:
return line.replace("\n", "")

@staticmethod
def _line_is_url(line: str) -> bool:
return urlparse(line).scheme != ""
def _line_is_url(line: str) -> bool:
return urlparse(line).scheme != ""

@staticmethod
def _extract_name_from_url(line: str) -> str | None:
# Try to find egg, for url like git+https://github.com/xxxxx/package@xxxxx#egg=package
match = re.search("egg=([a-zA-Z0-9-_]*)", line)
if match:
return match.group(1)

# for url like git+https://github.com/name/python-module.git@0d6dc38d58
match = re.search(r"\/((?:(?!\/).)*?)\.git", line)
if match:
return match.group(1)
def _extract_name_from_url(line: str) -> str | None:
# Try to find egg, for url like git+https://github.com/xxxxx/package@xxxxx#egg=package
match = re.search("egg=([a-zA-Z0-9-_]*)", line)
if match:
return match.group(1)

# for url like https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
match = re.search(r"\/((?:(?!\/).)*?)\/archive\/", line)
if match:
return match.group(1)
# for url like git+https://github.com/name/python-module.git@0d6dc38d58
match = re.search(r"\/((?:(?!\/).)*?)\.git", line)
if match:
return match.group(1)

logging.warning("Could not parse dependency name from url %s", line)
return None
# for url like https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
match = re.search(r"\/((?:(?!\/).)*?)\/archive\/", line)
if match:
return match.group(1)

logging.warning("Could not parse dependency name from url %s", line)
return None
1 change: 0 additions & 1 deletion tests/functional/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
class Project(str, Enum):
EXAMPLE = "example_project"
PEP_621 = "pep_621_project"
FUTURE_DEPRECATED_OBSOLETE_ARGUMENT = "project_with_future_deprecated_obsolete_argument"
GITIGNORE = "project_with_gitignore"
MULTIPLE_SOURCE_DIRECTORIES = "project_with_multiple_source_directories"
NAMESPACE = "project_using_namespace"
Expand Down
7 changes: 4 additions & 3 deletions tests/unit/dependency_getter/test_requirements_txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from deptry.dependency_getter.requirements_files import RequirementsTxtDependencyGetter
from deptry.dependency_getter.requirements_files import RequirementsTxtDependencyGetter, _line_is_url
from tests.utils import run_within_dir


Expand Down Expand Up @@ -63,7 +63,8 @@ def test_parse_requirements_files_urls(tmp_path: Path) -> None:
https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip
git+https://github.com/baz/foo-bar.git@asd#egg=foo-bar
git+https://github.com/baz/foo-bar.git@asd
git+https://github.com/abc123/bar-foo@xyz789#egg=bar-fooo"""
git+https://github.com/abc123/bar-foo@xyz789#egg=bar-fooo
https://unsupported-specification.com"""

with run_within_dir(tmp_path):
with Path("requirements.txt").open("w") as f:
Expand Down Expand Up @@ -200,4 +201,4 @@ def test_dev_multiple_with_arguments(tmp_path: Path) -> None:
],
)
def test__line_is_url(line: str, expected: bool) -> None:
assert RequirementsTxtDependencyGetter._line_is_url(line) is expected
assert _line_is_url(line) is expected

0 comments on commit 458eff9

Please sign in to comment.