Skip to content

Commit

Permalink
tailor should skip bad requirements targets (#15755)
Browse files Browse the repository at this point in the history
Adds basic validation for `requirements.txt`, pipfile, and `pyproject.toml` by tailor so that we don't permanently start trying to ingest invalid files.

Closes #15734
  • Loading branch information
Christopher Neugebauer authored Jun 7, 2022
1 parent c8cd9ef commit 5e208ec
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 24 deletions.
54 changes: 46 additions & 8 deletions src/python/pants/backend/python/goals/tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@

from __future__ import annotations

import logging
import os
import re
from dataclasses import dataclass
from pathlib import PurePath
from typing import Iterable

from pants.backend.python.dependency_inference.module_mapper import module_from_stripped_path
from pants.backend.python.macros.pipenv_requirements import parse_pipenv_requirements
from pants.backend.python.macros.poetry_requirements import PyProjectToml, parse_pyproject_toml
from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.target_types import (
PexBinary,
Expand All @@ -21,6 +24,7 @@
PythonTestUtilsGeneratorTarget,
ResolvedPexEntryPoint,
ResolvePexEntryPointRequest,
parse_requirements_file,
)
from pants.base.specs import AncestorGlobSpec, RawSpecs
from pants.core.goals.tailor import (
Expand All @@ -30,7 +34,7 @@
PutativeTargetsRequest,
group_by_dir,
)
from pants.engine.fs import DigestContents, PathGlobs, Paths
from pants.engine.fs import DigestContents, FileContent, PathGlobs, Paths
from pants.engine.internals.selectors import Get, MultiGet
from pants.engine.rules import collect_rules, rule
from pants.engine.target import Target, UnexpandedTargets
Expand All @@ -39,6 +43,8 @@
from pants.source.source_root import SourceRootsRequest, SourceRootsResult
from pants.util.logging import LogLevel

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class PutativePythonTargetsRequest(PutativeTargetsRequest):
Expand Down Expand Up @@ -126,15 +132,28 @@ async def find_putative_targets(
all_pipenv_lockfile_files,
all_pyproject_toml_contents,
) = await MultiGet(
Get(Paths, PathGlobs, req.path_globs("*requirements*.txt")),
Get(Paths, PathGlobs, req.path_globs("Pipfile.lock")),
Get(DigestContents, PathGlobs, req.path_globs("*requirements*.txt")),
Get(DigestContents, PathGlobs, req.path_globs("Pipfile.lock")),
Get(DigestContents, PathGlobs, req.path_globs("pyproject.toml")),
)

def add_req_targets(files: Iterable[str], alias: str, target_name: str) -> None:
unowned_files = set(files) - set(all_owned_sources)
def add_req_targets(files: Iterable[FileContent], alias: str, target_name: str) -> None:
contents = {i.path: i.content for i in files}
unowned_files = set(contents) - set(all_owned_sources)
for fp in unowned_files:
path, name = os.path.split(fp)

try:
validate(fp, contents[fp], alias)
except Exception as e:
logger.warning(
f"An error occurred when validating `{fp}`: {e}.\n\n"
"You'll need to create targets for its contents manually.\n"
"To silence this error in future, see "
"https://www.pantsbuild.org/docs/reference-tailor#section-ignore-paths \n"
)
continue

pts.append(
PutativeTarget(
path=path,
Expand All @@ -150,10 +169,29 @@ def add_req_targets(files: Iterable[str], alias: str, target_name: str) -> None:
)
)

add_req_targets(all_requirements_files.files, "python_requirements", "reqs")
add_req_targets(all_pipenv_lockfile_files.files, "pipenv_requirements", "pipenv")
def validate(path: str, contents: bytes, alias: str) -> None:
if alias == "python_requirements":
return validate_python_requirements(path, contents)
elif alias == "pipenv_requirements":
return validate_pipenv_requirements(contents)
elif alias == "poetry_requirements":
return validate_poetry_requirements(contents)

def validate_python_requirements(path: str, contents: bytes) -> None:
for _ in parse_requirements_file(contents.decode(), rel_path=path):
pass

def validate_pipenv_requirements(contents: bytes) -> None:
parse_pipenv_requirements(contents)

def validate_poetry_requirements(contents: bytes) -> None:
p = PyProjectToml(PurePath(), PurePath(), contents.decode())
parse_pyproject_toml(p)

add_req_targets(all_requirements_files, "python_requirements", "reqs")
add_req_targets(all_pipenv_lockfile_files, "pipenv_requirements", "pipenv")
add_req_targets(
{fc.path for fc in all_pyproject_toml_contents if b"[tool.poetry" in fc.content},
{fc for fc in all_pyproject_toml_contents if b"[tool.poetry" in fc.content},
"poetry_requirements",
"poetry",
)
Expand Down
57 changes: 56 additions & 1 deletion src/python/pants/backend/python/goals/tailor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_find_putative_targets(rule_runner: RuleRunner) -> None:
rule_runner.set_options(["--no-python-tailor-ignore-solitary-init-files"])
rule_runner.write_files(
{
"3rdparty/Pipfile.lock": "",
"3rdparty/Pipfile.lock": "{}",
"3rdparty/pyproject.toml": "[tool.poetry]",
"3rdparty/requirements-test.txt": "",
"already_owned/requirements.txt": "",
Expand Down Expand Up @@ -153,6 +153,61 @@ def test_find_putative_targets(rule_runner: RuleRunner) -> None:
)


def test_skip_invalid_requirements(rule_runner: RuleRunner) -> None:
rule_runner.set_options(["--no-python-tailor-ignore-solitary-init-files"])
rule_runner.write_files(
{
"3rdparty/requirements-valid.txt": b"FooProject >= 1.2",
"3rdparty/requirements-invalid.txt": b"FooProject LOLOLOLOL 1.2",
"pipfile-valid/Pipfile.lock": b"{}",
"pipfile-invalid/Pipfile.lock": b"FNARB",
"poetry-valid/pyproject.toml": b"[tool.poetry]",
"poetry-invalid/pyproject.toml": b"FNARB",
}
)
pts = rule_runner.request(
PutativeTargets,
[
PutativePythonTargetsRequest(
(
"3rdparty",
"pipfile-valid",
"pipfile-invalid",
"poetry-valid",
"poetry-invalid",
)
),
AllOwnedSources([]),
],
)
assert (
PutativeTargets(
[
PutativeTarget.for_target_type(
PythonRequirementsTargetGenerator,
path="3rdparty",
name="reqs",
triggering_sources=["3rdparty/requirements-valid.txt"],
kwargs={"source": "requirements-valid.txt"},
),
PutativeTarget.for_target_type(
PipenvRequirementsTargetGenerator,
path="pipfile-valid",
name="pipenv",
triggering_sources=["pipfile-valid/Pipfile.lock"],
),
PutativeTarget.for_target_type(
PoetryRequirementsTargetGenerator,
path="poetry-valid",
name="poetry",
triggering_sources=["poetry-valid/pyproject.toml"],
),
]
)
== pts
)


def test_find_putative_targets_subset(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
{
Expand Down
34 changes: 21 additions & 13 deletions src/python/pants/backend/python/macros/pipenv_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,11 @@ async def generate_from_pipenv_requirement(
description_of_origin=f"{generator}'s field `{PipenvSourceField.alias}`",
),
)
lock_info = json.loads(digest_contents[0].content)

module_mapping = generator[ModuleMappingField].value
stubs_mapping = generator[TypeStubsModuleMappingField].value

def generate_tgt(raw_req: str, info: dict) -> PythonRequirementTarget:
if info.get("extras"):
raw_req += f"[{','.join(info['extras'])}]"
raw_req += info.get("version", "")
if info.get("markers"):
raw_req += f";{info['markers']}"

parsed_req = PipRequirement.parse(raw_req)
def generate_tgt(parsed_req: PipRequirement) -> PythonRequirementTarget:
normalized_proj_name = canonicalize_project_name(parsed_req.project_name)
tgt_overrides = overrides.pop(normalized_proj_name, {})
if Dependencies.alias in tgt_overrides:
Expand All @@ -135,10 +127,8 @@ def generate_tgt(raw_req: str, info: dict) -> PythonRequirementTarget:
union_membership,
)

result = tuple(
generate_tgt(req, info)
for req, info in {**lock_info.get("default", {}), **lock_info.get("develop", {})}.items()
) + (file_tgt,)
reqs = parse_pipenv_requirements(digest_contents[0].content)
result = tuple(generate_tgt(req) for req in reqs) + (file_tgt,)

if overrides:
raise InvalidFieldException(
Expand All @@ -153,6 +143,24 @@ def generate_tgt(raw_req: str, info: dict) -> PythonRequirementTarget:
return GeneratedTargets(generator, result)


def parse_pipenv_requirements(file_contents: bytes) -> tuple[PipRequirement, ...]:
lock_info = json.loads(file_contents)

def _parse_pipenv_requirement(raw_req: str, info: dict) -> PipRequirement:
if info.get("extras"):
raw_req += f"[{','.join(info['extras'])}]"
raw_req += info.get("version", "")
if info.get("markers"):
raw_req += f";{info['markers']}"

return PipRequirement.parse(raw_req)

return tuple(
_parse_pipenv_requirement(req, info)
for req, info in {**lock_info.get("default", {}), **lock_info.get("develop", {})}.items()
)


def rules():
return (
*collect_rules(),
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/python/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,12 +1187,12 @@ def parse_requirements_file(content: str, *, rel_path: str) -> Iterator[PipRequi
This will safely ignore any options starting with `--` and will ignore comments. Any pip-style
VCS requirements will fail, with a helpful error message describing how to use PEP 440.
"""
for i, line in enumerate(content.splitlines()):
for i, line in enumerate(content.splitlines(), start=1):
line, _, _ = line.partition("--")
line = line.strip().rstrip("\\")
if not line or line.startswith(("#", "-")):
continue
yield PipRequirement.parse(line, description_of_origin=f"{rel_path} at line {i + 1}")
yield PipRequirement.parse(line, description_of_origin=f"{rel_path} at line {i}")


# -----------------------------------------------------------------------------------------------
Expand Down

0 comments on commit 5e208ec

Please sign in to comment.