From e6b377da6ebfc06d251fc15b499c01f9c959efa4 Mon Sep 17 00:00:00 2001 From: Jacob Floyd Date: Wed, 19 Jun 2024 23:34:47 -0500 Subject: [PATCH 01/10] Generate entry_points.txt for python_tests that require entry points from a python_distribution (#21062) This adds a new `entry_point_dependencies` field to `python_tests` and `python_test` targets. This allows tests to depend on a subset (or all) of the `entry_points` defined on `python_distribution` targets (only on the `entry_points` field, not in the `provides` field). For example: ```python python_tests( name="tests", entry_point_dependencies={ "//address/to:python_distro_tgt_1": ["*"], # all entry points # only one group of entry points "//address/to:python_distro_tgt_2": ["console_scripts"], "//address/to:python_distro_tgt_4": ["st2common.runners.runner"], # or multiple groups of entry points "//address/to:python_distro_tgt_5": ["console_scripts", "st2common.runners.runner"], # or 1+ individual entry points "//address/to:python_distro_tgt_6": ["console_scripts/foo-bar"], "//address/to:python_distro_tgt_7": ["console_scripts/foo-bar", "console_scripts/baz"], "//address/to:python_distro_tgt_8": ["st2common.runners.runner/action-chain", "console_scripts/foo-bar"], }, ) ``` A dependency defined in `entry_point_dependencies` emulates an editable install of those `python_distribution` targets. Instead of including all of the `python_distribution`'s sources, only the specified entry points are made available. The entry_points metadata is also installed in the pytest sandbox so that tests (or the code under test) can load that metadata via `pkg_resources` or `importlib.metadata.entry_points` or similar. Misc notes: - I added this to the `pants.backend.experimental.python` backend and also registered the rules I extracted from `pants.backend.experimental.python.framework.stevedore` with the stevedore backend since it needs those rules. - Unlike all other dependencies fields, `entry_point_dependencies` is a `dict[str, list[str]]`. The `.keys()` of the dict are logically similar to standard dependencies fields. Using a dict provides more granular control over the dependencies. - Also unlike other dependencies fields, this field does NOT result in a dependency on the `python_distribution` target. Instead, that target provides the `entry_points` metadata to infer dependencies on the actual code for those entry points. - I extracted most of the logic for this from the experimental stevedore plugin. Enabling the stevedore plugin is not required to use this feature. Closes #11386 Closes #15481 --- docs/notes/2.23.x.md | 7 + .../python/framework/stevedore/register.py | 2 + .../backend/experimental/python/register.py | 5 +- .../stevedore/python_target_dependencies.py | 84 +--- .../python_target_dependencies_test.py | 2 + .../python/framework/stevedore/rules.py | 82 +--- .../python/framework/stevedore/rules_test.py | 2 + .../pants/backend/python/target_types.py | 45 +- .../backend/python/target_types_rules.py | 24 +- .../backend/python/util_rules/entry_points.py | 392 ++++++++++++++++++ .../python/util_rules/entry_points_test.py | 303 ++++++++++++++ 11 files changed, 792 insertions(+), 156 deletions(-) create mode 100644 src/python/pants/backend/python/util_rules/entry_points.py create mode 100644 src/python/pants/backend/python/util_rules/entry_points_test.py diff --git a/docs/notes/2.23.x.md b/docs/notes/2.23.x.md index 8ecd24852b2..f9e6077b82d 100644 --- a/docs/notes/2.23.x.md +++ b/docs/notes/2.23.x.md @@ -80,6 +80,13 @@ The default version of the pex tool has been updated from 2.3.1 to 2.3.3. Fix running python source files that have dashes in them (bug introduced in 2.20). For example: `pants run path/to/some-executable.py` +A new `entry_point_dependencies` field is now available for `python_tests` and `python_test` targets. This allows tests +to depend on a subset (or all) of the `entry_points` defined on `python_distribution` targets. A dependency defined in +`entry_point_dependencies` emulates an editable install of those `python_distribution` targets. Instead of including +all of the `python_distribution`'s sources, only the specified entry points are made available. The entry_points metadata +is also installed in the pytest sandbox so that tests (or the code under test) can load that metadata via `pkg_resources`. +To use this, enable the `pants.backend.experimental.python` backend. + #### Terraform The `tfsec` linter now works on all supported platforms without extra config. diff --git a/src/python/pants/backend/experimental/python/framework/stevedore/register.py b/src/python/pants/backend/experimental/python/framework/stevedore/register.py index bad4823cb55..0847d0f6729 100644 --- a/src/python/pants/backend/experimental/python/framework/stevedore/register.py +++ b/src/python/pants/backend/experimental/python/framework/stevedore/register.py @@ -10,6 +10,7 @@ from pants.backend.python.framework.stevedore import rules as stevedore_rules from pants.backend.python.framework.stevedore.target_types import StevedoreNamespace from pants.backend.python.target_types_rules import rules as python_target_types_rules +from pants.backend.python.util_rules.entry_points import rules as entry_points_rules from pants.build_graph.build_file_aliases import BuildFileAliases @@ -19,6 +20,7 @@ def build_file_aliases(): def rules(): return [ + *entry_points_rules(), *stevedore_rules.rules(), *python_target_dependencies.rules(), *python_target_types_rules(), diff --git a/src/python/pants/backend/experimental/python/register.py b/src/python/pants/backend/experimental/python/register.py index e06536939ad..f6f323d0ad5 100644 --- a/src/python/pants/backend/experimental/python/register.py +++ b/src/python/pants/backend/experimental/python/register.py @@ -4,7 +4,8 @@ from pants.backend.python.goals import debug_goals, publish from pants.backend.python.subsystems import setuptools_scm, twine from pants.backend.python.target_types import VCSVersion -from pants.backend.python.util_rules import pex, vcs_versioning +from pants.backend.python.target_types_rules import rules as target_types_rules +from pants.backend.python.util_rules import entry_points, pex, vcs_versioning def rules(): @@ -15,6 +16,8 @@ def rules(): *setuptools_scm.rules(), *twine.rules(), *debug_goals.rules(), + *target_types_rules(), + *entry_points.rules(), ) diff --git a/src/python/pants/backend/python/framework/stevedore/python_target_dependencies.py b/src/python/pants/backend/python/framework/stevedore/python_target_dependencies.py index 0d64ea18025..cb3a8cfea8d 100644 --- a/src/python/pants/backend/python/framework/stevedore/python_target_dependencies.py +++ b/src/python/pants/backend/python/framework/stevedore/python_target_dependencies.py @@ -7,10 +7,6 @@ from dataclasses import dataclass from typing import Mapping -from pants.backend.python.dependency_inference.module_mapper import ( - PythonModuleOwners, - PythonModuleOwnersRequest, -) from pants.backend.python.framework.stevedore.target_types import ( AllStevedoreExtensionTargets, StevedoreExtensionTargets, @@ -20,20 +16,18 @@ ) from pants.backend.python.target_types import ( PythonDistribution, - PythonDistributionDependenciesField, PythonDistributionEntryPointsField, PythonTestsDependenciesField, PythonTestsGeneratorTarget, PythonTestTarget, - ResolvedPythonDistributionEntryPoints, - ResolvePythonDistributionEntryPointsRequest, ) -from pants.engine.addresses import Address, Addresses -from pants.engine.rules import Get, MultiGet, collect_rules, rule +from pants.backend.python.util_rules.entry_points import ( + EntryPointDependencies, + GetEntryPointDependenciesRequest, +) +from pants.engine.rules import Get, collect_rules, rule from pants.engine.target import ( AllTargets, - DependenciesRequest, - ExplicitlyProvidedDependencies, FieldSet, InferDependenciesRequest, InferredDependencies, @@ -42,8 +36,6 @@ from pants.engine.unions import UnionRule from pants.util.frozendict import FrozenDict from pants.util.logging import LogLevel -from pants.util.ordered_set import OrderedSet -from pants.util.strutil import softwrap # ----------------------------------------------------------------------------------------------- # Utility rules to analyze all StevedoreNamespace entry_points @@ -154,65 +146,17 @@ async def infer_stevedore_namespaces_dependencies( StevedoreNamespacesProviderTargetsRequest(requested_namespaces), ) - # This is based on pants.backend.python.target_type_rules.infer_python_distribution_dependencies, - # but handles multiple targets and filters the entry_points to just get the requested deps. - all_explicit_dependencies = await MultiGet( - Get( - ExplicitlyProvidedDependencies, - DependenciesRequest(tgt[PythonDistributionDependenciesField]), - ) - for tgt in targets - ) - all_resolved_entry_points = await MultiGet( - Get( - ResolvedPythonDistributionEntryPoints, - ResolvePythonDistributionEntryPointsRequest(tgt[PythonDistributionEntryPointsField]), - ) - for tgt in targets + requested_namespaces_value = requested_namespaces.value + entry_point_dependencies = await Get( + EntryPointDependencies, + GetEntryPointDependenciesRequest( + targets, + lambda tgt, ns: ns in requested_namespaces_value, + lambda tgt, ns, ep_name: True, + ), ) - all_module_entry_points = [ - (tgt.address, namespace, name, entry_point, explicitly_provided_deps) - for tgt, distribution_entry_points, explicitly_provided_deps in zip( - targets, all_resolved_entry_points, all_explicit_dependencies - ) - for namespace, entry_points in distribution_entry_points.explicit_modules.items() - for name, entry_point in entry_points.items() - ] - all_module_owners = await MultiGet( - Get(PythonModuleOwners, PythonModuleOwnersRequest(entry_point.module, resolve=None)) - for _, _, _, entry_point, _ in all_module_entry_points - ) - module_owners: OrderedSet[Address] = OrderedSet() - for (address, namespace, name, entry_point, explicitly_provided_deps), owners in zip( - all_module_entry_points, all_module_owners - ): - if namespace not in requested_namespaces.value: - continue - - field_str = repr({namespace: {name: entry_point.spec}}) - explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference( - owners.ambiguous, - address, - import_reference="module", - context=softwrap( - f""" - The python_distribution target {address} has the field - `entry_points={field_str}`, which maps to the Python module - `{entry_point.module}` - """ - ), - ) - maybe_disambiguated = explicitly_provided_deps.disambiguated(owners.ambiguous) - unambiguous_owners = owners.unambiguous or ( - (maybe_disambiguated,) if maybe_disambiguated else () - ) - module_owners.update(unambiguous_owners) - - result: tuple[Address, ...] = Addresses(module_owners) - for distribution_entry_points in all_resolved_entry_points: - result += distribution_entry_points.pex_binary_addresses - return InferredDependencies(result) + return InferredDependencies(entry_point_dependencies.addresses) def rules(): diff --git a/src/python/pants/backend/python/framework/stevedore/python_target_dependencies_test.py b/src/python/pants/backend/python/framework/stevedore/python_target_dependencies_test.py index 972e55f63b8..3ab6e7dc9d5 100644 --- a/src/python/pants/backend/python/framework/stevedore/python_target_dependencies_test.py +++ b/src/python/pants/backend/python/framework/stevedore/python_target_dependencies_test.py @@ -31,6 +31,7 @@ PythonTestTarget, ) from pants.backend.python.target_types_rules import rules as python_target_types_rules +from pants.backend.python.util_rules.entry_points import rules as entry_points_rules from pants.engine.addresses import Address from pants.engine.target import InferredDependencies from pants.testutil.rule_runner import QueryRule, RuleRunner @@ -74,6 +75,7 @@ def write_test_files(rule_runner: RuleRunner, extra_build_contents: str = ""): def rule_runner() -> RuleRunner: rule_runner = RuleRunner( rules=[ + *entry_points_rules(), *python_target_types_rules(), *stevedore_dep_rules(), QueryRule(AllStevedoreExtensionTargets, ()), diff --git a/src/python/pants/backend/python/framework/stevedore/rules.py b/src/python/pants/backend/python/framework/stevedore/rules.py index dad8e3026e7..e2e131656f0 100644 --- a/src/python/pants/backend/python/framework/stevedore/rules.py +++ b/src/python/pants/backend/python/framework/stevedore/rules.py @@ -3,26 +3,21 @@ from __future__ import annotations -from collections import defaultdict - from pants.backend.python.framework.stevedore.target_types import ( StevedoreExtensionTargets, StevedoreNamespacesField, StevedoreNamespacesProviderTargetsRequest, ) from pants.backend.python.goals.pytest_runner import PytestPluginSetup, PytestPluginSetupRequest -from pants.backend.python.target_types import ( - PythonDistribution, - PythonDistributionEntryPoint, - PythonDistributionEntryPointsField, - ResolvedPythonDistributionEntryPoints, - ResolvePythonDistributionEntryPointsRequest, +from pants.backend.python.target_types import PythonDistribution +from pants.backend.python.util_rules.entry_points import ( + EntryPointsTxt, + GenerateEntryPointsTxtRequest, ) -from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent, PathGlobs, Paths -from pants.engine.rules import Get, MultiGet, collect_rules, rule +from pants.engine.fs import EMPTY_DIGEST +from pants.engine.rules import Get, collect_rules, rule from pants.engine.target import Target from pants.engine.unions import UnionRule -from pants.util.frozendict import FrozenDict from pants.util.logging import LogLevel @@ -52,63 +47,16 @@ async def generate_entry_points_txt_from_stevedore_extension( StevedoreNamespacesProviderTargetsRequest(requested_namespaces), ) - all_resolved_entry_points = await MultiGet( - Get( - ResolvedPythonDistributionEntryPoints, - ResolvePythonDistributionEntryPointsRequest(tgt[PythonDistributionEntryPointsField]), - ) - for tgt in stevedore_targets - ) - - possible_paths = [ - { - f"{tgt.address.spec_path}/{ep.entry_point.module.split('.')[0]}" - for _, entry_points in (resolved_eps.val or {}).items() - for ep in entry_points.values() - } - for tgt, resolved_eps in zip(stevedore_targets, all_resolved_entry_points) - ] - resolved_paths = await MultiGet( - Get(Paths, PathGlobs(module_candidate_paths)) for module_candidate_paths in possible_paths + requested_namespaces_value = requested_namespaces.value + entry_points_txt = await Get( + EntryPointsTxt, + GenerateEntryPointsTxtRequest( + stevedore_targets, + lambda tgt, ns: ns in requested_namespaces_value, + lambda tgt, ns, ep_name: True, + ), ) - - # arrange in sibling groups - stevedore_extensions_by_path: dict[ - str, list[ResolvedPythonDistributionEntryPoints] - ] = defaultdict(list) - for resolved_ep, paths in zip(all_resolved_entry_points, resolved_paths): - path = paths.dirs[0] # just take the first match - stevedore_extensions_by_path[path].append(resolved_ep) - - entry_points_txt_files = [] - for module_path, resolved_eps in stevedore_extensions_by_path.items(): - namespace_sections = {} - - for resolved_ep in resolved_eps: - namespace: str - entry_points: FrozenDict[str, PythonDistributionEntryPoint] - for namespace, entry_points in resolved_ep.val.items(): - if not entry_points or namespace not in requested_namespaces.value: - continue - - entry_points_txt_section = f"[{namespace}]\n" - for entry_point_name, ep in sorted(entry_points.items()): - entry_points_txt_section += f"{entry_point_name} = {ep.entry_point.spec}\n" - entry_points_txt_section += "\n" - namespace_sections[namespace] = entry_points_txt_section - - # consistent sorting - entry_points_txt_contents = "".join( - namespace_sections[ns] for ns in sorted(namespace_sections) - ) - - entry_points_txt_path = f"{module_path}.egg-info/entry_points.txt" - entry_points_txt_files.append( - FileContent(entry_points_txt_path, entry_points_txt_contents.encode("utf-8")) - ) - - digest = await Get(Digest, CreateDigest(entry_points_txt_files)) - return PytestPluginSetup(digest) + return PytestPluginSetup(entry_points_txt.digest) def rules(): diff --git a/src/python/pants/backend/python/framework/stevedore/rules_test.py b/src/python/pants/backend/python/framework/stevedore/rules_test.py index 5b6bffff5d8..34e3a366377 100644 --- a/src/python/pants/backend/python/framework/stevedore/rules_test.py +++ b/src/python/pants/backend/python/framework/stevedore/rules_test.py @@ -25,6 +25,7 @@ PythonTestTarget, ) from pants.backend.python.target_types_rules import rules as python_target_types_rules +from pants.backend.python.util_rules.entry_points import rules as entry_points_rules from pants.engine.addresses import Address from pants.engine.fs import EMPTY_DIGEST, CreateDigest, Digest, FileContent from pants.testutil.rule_runner import QueryRule, RuleRunner @@ -37,6 +38,7 @@ def rule_runner() -> RuleRunner: return RuleRunner( rules=[ + *entry_points_rules(), *python_target_types_rules(), *stevedore_dep_rules(), *stevedore_rules(), diff --git a/src/python/pants/backend/python/target_types.py b/src/python/pants/backend/python/target_types.py index 5c3ffe3c954..d9f880fd733 100644 --- a/src/python/pants/backend/python/target_types.py +++ b/src/python/pants/backend/python/target_types.py @@ -940,6 +940,47 @@ class PythonTestsDependenciesField(PythonDependenciesField): supports_transitive_excludes = True +class PythonTestsEntryPointDependenciesField(DictStringToStringSequenceField): + alias = "entry_point_dependencies" + help = help_text( + lambda: f""" + Dependencies on entry point metadata of `{PythonDistribution.alias}` targets. + + This is a dict where each key is a `{PythonDistribution.alias}` address + and the value is a list or tuple of entry point groups and/or entry points + on that target. The strings in the value list/tuple must be one of: + - "entry.point.group/entry-point-name" to depend on a named entry point + - "entry.point.group" (without a "/") to depend on an entry point group + - "*" to get all entry points on the target + + For example: + + {PythonTestsEntryPointDependenciesField.alias}={{ + "//foo/address:dist_tgt": ["*"], # all entry points + "bar:dist_tgt": ["console_scripts"], # only from this group + "foo/bar/baz:dist_tgt": ["console_scripts/my-script"], # a single entry point + "another:dist_tgt": [ # multiple entry points + "console_scripts/my-script", + "console_scripts/another-script", + "entry.point.group/entry-point-name", + "other.group", + "gui_scripts", + ], + }} + + Code for matching `entry_points` on `{PythonDistribution.alias}` targets + will be added as dependencies so that they are available on PYTHONPATH + during tests. + + Plus, an `entry_points.txt` file will be generated in the sandbox so that + each of the `{PythonDistribution.alias}`s appear to be "installed". The + `entry_points.txt` file will only include the entry points requested on this + field. This allows the tests, or the code under test, to lookup entry points + metadata using something like `pkg_resources.iter_entry_points` from `setuptools`. + """ + ) + + # TODO This field class should extend from a core `TestTimeoutField` once the deprecated options in `pytest` get removed. class PythonTestsTimeoutField(IntField): alias = "timeout" @@ -1014,6 +1055,8 @@ class SkipPythonTestsField(BoolField): _PYTHON_TEST_MOVED_FIELDS = ( PythonTestsDependenciesField, + # This field is registered in the experimental backend for now. + # PythonTestsEntryPointDependenciesField, PythonResolveField, PythonRunGoalUseSandboxField, PythonTestsTimeoutField, @@ -1641,7 +1684,7 @@ class LongDescriptionPathField(StringField): class PythonDistribution(Target): - alias = "python_distribution" + alias: ClassVar[str] = "python_distribution" core_fields = ( *COMMON_TARGET_FIELDS, InterpreterConstraintsField, diff --git a/src/python/pants/backend/python/target_types_rules.py b/src/python/pants/backend/python/target_types_rules.py index 38cc59ce9af..7aa152763cf 100644 --- a/src/python/pants/backend/python/target_types_rules.py +++ b/src/python/pants/backend/python/target_types_rules.py @@ -45,6 +45,9 @@ ResolvePexEntryPointRequest, ResolvePythonDistributionEntryPointsRequest, ) +from pants.backend.python.util_rules.entry_points import ( + get_python_distribution_entry_point_unambiguous_module_owners, +) from pants.backend.python.util_rules.interpreter_constraints import interpreter_constraints_contains from pants.backend.python.util_rules.package_dists import InvalidEntryPoint from pants.engine.addresses import Address, Addresses, UnparsedAddressInputs @@ -538,24 +541,11 @@ async def infer_python_distribution_dependencies( ) module_owners: OrderedSet[Address] = OrderedSet() for (category, name, entry_point), owners in zip(all_module_entry_points, all_module_owners): - field_str = repr({category: {name: entry_point.spec}}) - explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference( - owners.ambiguous, - address, - import_reference="module", - context=softwrap( - f""" - The python_distribution target {address} has the field - `entry_points={field_str}`, which maps to the Python module - `{entry_point.module}` - """ - ), - ) - maybe_disambiguated = explicitly_provided_deps.disambiguated(owners.ambiguous) - unambiguous_owners = owners.unambiguous or ( - (maybe_disambiguated,) if maybe_disambiguated else () + module_owners.update( + get_python_distribution_entry_point_unambiguous_module_owners( + address, category, name, entry_point, explicitly_provided_deps, owners + ) ) - module_owners.update(unambiguous_owners) return InferredDependencies( Addresses(module_owners) diff --git a/src/python/pants/backend/python/util_rules/entry_points.py b/src/python/pants/backend/python/util_rules/entry_points.py new file mode 100644 index 00000000000..b2ae71c4c93 --- /dev/null +++ b/src/python/pants/backend/python/util_rules/entry_points.py @@ -0,0 +1,392 @@ +# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). +from collections import defaultdict +from dataclasses import dataclass +from typing import Callable, Iterable + +from pants.backend.python.dependency_inference.module_mapper import ( + PythonModuleOwners, + PythonModuleOwnersRequest, +) +from pants.backend.python.goals.pytest_runner import PytestPluginSetup, PytestPluginSetupRequest +from pants.backend.python.target_types import ( + EntryPoint, + PythonDistribution, + PythonDistributionDependenciesField, + PythonDistributionEntryPoint, + PythonDistributionEntryPointsField, + PythonTestsDependenciesField, + PythonTestsEntryPointDependenciesField, + PythonTestsGeneratorTarget, + PythonTestTarget, + ResolvedPythonDistributionEntryPoints, + ResolvePythonDistributionEntryPointsRequest, +) +from pants.engine.addresses import Addresses, UnparsedAddressInputs +from pants.engine.fs import CreateDigest, FileContent, PathGlobs, Paths +from pants.engine.internals.native_engine import EMPTY_DIGEST, Address, Digest +from pants.engine.internals.selectors import MultiGet +from pants.engine.rules import Get, collect_rules, rule +from pants.engine.target import ( + DependenciesRequest, + ExplicitlyProvidedDependencies, + FieldSet, + InferDependenciesRequest, + InferredDependencies, + Target, + Targets, +) +from pants.engine.unions import UnionRule +from pants.util.frozendict import FrozenDict +from pants.util.logging import LogLevel +from pants.util.ordered_set import FrozenOrderedSet, OrderedSet +from pants.util.strutil import softwrap + +PythonDistributionEntryPointGroupPredicate = Callable[[Target, str], bool] +PythonDistributionEntryPointPredicate = Callable[[Target, str, str], bool] + + +def get_python_distribution_entry_point_unambiguous_module_owners( + address: Address, + entry_point_group: str, # group is the pypa term; aka category or namespace + entry_point_name: str, + entry_point: EntryPoint, + explicitly_provided_deps: ExplicitlyProvidedDependencies, + owners: PythonModuleOwners, +) -> tuple[Address, ...]: + field_str = repr({entry_point_group: {entry_point_name: entry_point.spec}}) + explicitly_provided_deps.maybe_warn_of_ambiguous_dependency_inference( + owners.ambiguous, + address, + import_reference="module", + context=softwrap( + f""" + The python_distribution target {address} has the field + `entry_points={field_str}`, which maps to the Python module + `{entry_point.module}` + """ + ), + ) + maybe_disambiguated = explicitly_provided_deps.disambiguated(owners.ambiguous) + unambiguous_owners = owners.unambiguous or ( + (maybe_disambiguated,) if maybe_disambiguated else () + ) + return unambiguous_owners + + +@dataclass(frozen=True) +class GetEntryPointDependenciesRequest: + targets: Targets + group_predicate: PythonDistributionEntryPointGroupPredicate + predicate: PythonDistributionEntryPointPredicate + + +@dataclass(frozen=True) +class EntryPointDependencies: + addresses: FrozenOrderedSet[Address] + + def __init__(self, addresses: Iterable[Address]) -> None: + object.__setattr__(self, "addresses", FrozenOrderedSet(sorted(addresses))) + + +@rule +async def get_filtered_entry_point_dependencies( + request: GetEntryPointDependenciesRequest, +) -> EntryPointDependencies: + # This is based on pants.backend.python.target_type_rules.infer_python_distribution_dependencies, + # but handles multiple targets and filters the entry_points to just get the requested deps. + all_explicit_dependencies = await MultiGet( + Get( + ExplicitlyProvidedDependencies, + DependenciesRequest(tgt[PythonDistributionDependenciesField]), + ) + for tgt in request.targets + ) + resolved_entry_points = await MultiGet( + Get( + ResolvedPythonDistributionEntryPoints, + ResolvePythonDistributionEntryPointsRequest(tgt[PythonDistributionEntryPointsField]), + ) + for tgt in request.targets + ) + + filtered_entry_point_pex_addresses: list[Address] = [] + filtered_entry_point_modules: list[ + tuple[Address, str, str, EntryPoint, ExplicitlyProvidedDependencies] + ] = [] + + distribution_entry_points: ResolvedPythonDistributionEntryPoints + for tgt, distribution_entry_points, explicitly_provided_deps in zip( + request.targets, resolved_entry_points, all_explicit_dependencies + ): + # use .val instead of .explicit_modules and .pex_binary_addresses to facilitate filtering + for ep_group, entry_points in distribution_entry_points.val.items(): + if not request.group_predicate(tgt, ep_group): + continue + for ep_name, ep_val in entry_points.items(): + if not request.predicate(tgt, ep_group, ep_name): + continue + if ep_val.pex_binary_address: + filtered_entry_point_pex_addresses.append(ep_val.pex_binary_address) + else: + filtered_entry_point_modules.append( + ( + tgt.address, + ep_group, + ep_name, + ep_val.entry_point, + explicitly_provided_deps, + ) + ) + filtered_module_owners = await MultiGet( + Get(PythonModuleOwners, PythonModuleOwnersRequest(entry_point.module, resolve=None)) + for _, _, _, entry_point, _ in filtered_entry_point_modules + ) + + filtered_unambiguous_module_owners: OrderedSet[Address] = OrderedSet() + for (address, ep_group, ep_name, entry_point, explicitly_provided_deps), owners in zip( + filtered_entry_point_modules, filtered_module_owners + ): + filtered_unambiguous_module_owners.update( + get_python_distribution_entry_point_unambiguous_module_owners( + address, ep_group, ep_name, entry_point, explicitly_provided_deps, owners + ) + ) + + return EntryPointDependencies( + Addresses((*filtered_entry_point_pex_addresses, *filtered_unambiguous_module_owners)) + ) + + +async def _get_entry_point_deps_targets_and_predicates( + owning_address: Address, entry_point_deps: PythonTestsEntryPointDependenciesField +) -> tuple[ + Targets, PythonDistributionEntryPointGroupPredicate, PythonDistributionEntryPointPredicate +]: + if not entry_point_deps.value: # type narrowing for mypy + raise ValueError( + "Please file an issue if you see this error. This rule helper must not " + "be called with an empty entry_point_dependencies field." + ) + targets = await Get( + Targets, + UnparsedAddressInputs( + entry_point_deps.value.keys(), + owning_address=owning_address, + description_of_origin=f"{PythonTestsEntryPointDependenciesField.alias} from {owning_address}", + ), + ) + + requested_entry_points: dict[Target, set[str]] = {} + + requested_ep: tuple[str, ...] + for target, requested_ep in zip(targets, entry_point_deps.value.values()): + if not requested_ep: + # requested an empty list, so no entry points were actually requested. + continue + if "*" in requested_ep and len(requested_ep) > 1: + requested_ep = ("*",) + + if not target.has_field(PythonDistributionEntryPointsField): + # unknown target type. ignore + continue + if not target.get(PythonDistributionEntryPointsField).value: + # no entry points can be resolved. + # TODO: Maybe warn that the requested entry points do not exist? + continue + requested_entry_points[target] = set(requested_ep) + + def group_predicate(tgt: Target, ep_group: str) -> bool: + relevant = ("*", ep_group) + for item in sorted(requested_entry_points[tgt]): + if item in relevant or item.startswith(f"{ep_group}/"): + return True + return False + + def predicate(tgt: Target, ep_group: str, ep_name: str) -> bool: + relevant = {"*", ep_group, f"{ep_group}/{ep_name}"} + if relevant & requested_entry_points[tgt]: + # at least one requested entry point is relevant + return True + return False + + return Targets(requested_entry_points.keys()), group_predicate, predicate + + +@dataclass(frozen=True) +class PythonTestsEntryPointDependenciesInferenceFieldSet(FieldSet): + required_fields = ( + PythonTestsDependenciesField, + PythonTestsEntryPointDependenciesField, + ) + entry_point_dependencies: PythonTestsEntryPointDependenciesField + + +class InferEntryPointDependencies(InferDependenciesRequest): + infer_from = PythonTestsEntryPointDependenciesInferenceFieldSet + + +@rule( + desc=f"Infer dependencies based on `{PythonTestsEntryPointDependenciesField.alias}` field.", + level=LogLevel.DEBUG, +) +async def infer_entry_point_dependencies( + request: InferEntryPointDependencies, +) -> InferredDependencies: + entry_point_deps: PythonTestsEntryPointDependenciesField = ( + request.field_set.entry_point_dependencies + ) + if entry_point_deps.value is None: + return InferredDependencies([]) + + dist_targets, group_predicate, predicate = await _get_entry_point_deps_targets_and_predicates( + request.field_set.address, entry_point_deps + ) + + entry_point_dependencies = await Get( + EntryPointDependencies, + GetEntryPointDependenciesRequest(dist_targets, group_predicate, predicate), + ) + return InferredDependencies(entry_point_dependencies.addresses) + + +@dataclass(frozen=True) +class GenerateEntryPointsTxtRequest: + targets: Targets + group_predicate: PythonDistributionEntryPointGroupPredicate + predicate: PythonDistributionEntryPointPredicate + + +@dataclass(frozen=True) +class EntryPointsTxt: + digest: Digest + + +@rule +async def generate_entry_points_txt(request: GenerateEntryPointsTxtRequest) -> EntryPointsTxt: + if not request.targets: + return EntryPointsTxt(EMPTY_DIGEST) + + all_resolved_entry_points = await MultiGet( + Get( + ResolvedPythonDistributionEntryPoints, + ResolvePythonDistributionEntryPointsRequest(tgt[PythonDistributionEntryPointsField]), + ) + for tgt in request.targets + ) + + possible_paths = [ + { + f"{tgt.address.spec_path}/{ep.entry_point.module.split('.')[0]}" + for _, entry_points in (resolved_eps.val or {}).items() + for ep in entry_points.values() + } + for tgt, resolved_eps in zip(request.targets, all_resolved_entry_points) + ] + resolved_paths = await MultiGet( + Get(Paths, PathGlobs(module_candidate_paths)) for module_candidate_paths in possible_paths + ) + + entry_points_by_path: dict[ + str, list[tuple[Target, ResolvedPythonDistributionEntryPoints]] + ] = defaultdict(list) + + target: Target + resolved_ep: ResolvedPythonDistributionEntryPoints + paths: Paths + for target, resolved_ep, paths in zip( + request.targets, all_resolved_entry_points, resolved_paths + ): + path = paths.dirs[0] # just take the first match + entry_points_by_path[path].append((target, resolved_ep)) + + entry_points_txt_files = [] + for module_path, target_and_resolved_eps in entry_points_by_path.items(): + group_sections = {} + + for target, resolved_ep in target_and_resolved_eps: + ep_group: str + entry_points: FrozenDict[str, PythonDistributionEntryPoint] + for ep_group, entry_points in resolved_ep.val.items(): + if not entry_points or not request.group_predicate(target, ep_group): + continue + + entry_points_txt_section = f"[{ep_group}]\n" + selected_entry_points_in_group = False + for entry_point_name, ep in sorted(entry_points.items()): + if not request.predicate(target, ep_group, entry_point_name): + continue + selected_entry_points_in_group = True + entry_points_txt_section += f"{entry_point_name} = {ep.entry_point.spec}\n" + if not selected_entry_points_in_group: + continue + entry_points_txt_section += "\n" + group_sections[ep_group] = entry_points_txt_section + + if not group_sections: + continue + + # consistent sorting + entry_points_txt_contents = "".join( + group_sections[ep_group] for ep_group in sorted(group_sections) + ) + + entry_points_txt_path = f"{module_path}.egg-info/entry_points.txt" + entry_points_txt_files.append( + FileContent(entry_points_txt_path, entry_points_txt_contents.encode("utf-8")) + ) + + if not entry_points_txt_files: + digest = EMPTY_DIGEST + else: + digest = await Get(Digest, CreateDigest(entry_points_txt_files)) + return EntryPointsTxt(digest) + + +class GenerateEntryPointsTxtFromEntryPointDependenciesRequest(PytestPluginSetupRequest): + @classmethod + def is_applicable(cls, target: Target) -> bool: + # select python_tests targets with entry_point_dependencies field + return ( + target.has_field(PythonTestsEntryPointDependenciesField) + and target.get(PythonTestsEntryPointDependenciesField).value is not None + ) + + +@rule( + desc=f"Generate entry_points.txt to imitate `{PythonDistribution.alias}` installation.", + level=LogLevel.DEBUG, +) +async def generate_entry_points_txt_from_entry_point_dependencies( + request: GenerateEntryPointsTxtFromEntryPointDependenciesRequest, +) -> PytestPluginSetup: + entry_point_deps = request.target[PythonTestsEntryPointDependenciesField] + if not entry_point_deps.value: + return PytestPluginSetup(EMPTY_DIGEST) + + dist_targets, group_predicate, predicate = await _get_entry_point_deps_targets_and_predicates( + request.target.address, entry_point_deps + ) + + entry_points_txt = await Get( + EntryPointsTxt, + GenerateEntryPointsTxtRequest(dist_targets, group_predicate, predicate), + ) + return PytestPluginSetup(entry_points_txt.digest) + + +def rules(): + return [ + *collect_rules(), + # TODO: remove these register_plugin_field calls once this moves out of experimental + PythonTestTarget.register_plugin_field(PythonTestsEntryPointDependenciesField), + PythonTestsGeneratorTarget.register_plugin_field( + PythonTestsEntryPointDependenciesField, + as_moved_field=True, + ), + UnionRule(InferDependenciesRequest, InferEntryPointDependencies), + UnionRule( + PytestPluginSetupRequest, + GenerateEntryPointsTxtFromEntryPointDependenciesRequest, + ), + ] diff --git a/src/python/pants/backend/python/util_rules/entry_points_test.py b/src/python/pants/backend/python/util_rules/entry_points_test.py new file mode 100644 index 00000000000..959720874c3 --- /dev/null +++ b/src/python/pants/backend/python/util_rules/entry_points_test.py @@ -0,0 +1,303 @@ +# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +from textwrap import dedent +from typing import Iterable + +import pytest + +from pants.backend.python.goals.pytest_runner import PytestPluginSetup +from pants.backend.python.macros.python_artifact import PythonArtifact +from pants.backend.python.target_types import ( + PythonDistribution, + PythonSourcesGeneratorTarget, + PythonSourceTarget, + PythonTestsGeneratorTarget, + PythonTestTarget, +) +from pants.backend.python.target_types_rules import rules as python_target_types_rules +from pants.backend.python.util_rules.entry_points import ( + GenerateEntryPointsTxtFromEntryPointDependenciesRequest, + InferEntryPointDependencies, + PythonTestsEntryPointDependenciesInferenceFieldSet, +) +from pants.backend.python.util_rules.entry_points import rules as entry_points_rules +from pants.engine.addresses import Address +from pants.engine.fs import CreateDigest, DigestContents, FileContent +from pants.engine.internals.native_engine import EMPTY_DIGEST, Digest +from pants.engine.target import InferredDependencies +from pants.testutil.rule_runner import QueryRule, RuleRunner + +# random set of runner names to use in tests +st2_runners = ["noop", "foobar"] + + +def write_test_files(rule_runner: RuleRunner, extra_build_contents: str = ""): + for runner in st2_runners: + rule_runner.write_files( + { + f"runners/{runner}_runner/BUILD": dedent( + f"""\ + python_distribution( + provides=python_artifact( + name="stackstorm-runner-{runner}", + ), + entry_points={{ + "st2common.runners.runner": {{ + "{runner}": "{runner}_runner.{runner}_runner", + }}, + "console_scripts": {{ + "some-thing": "{runner}_runner.thing:main", + }}, + }}, + ) + """ + ) + + extra_build_contents.format(runner=runner), + f"runners/{runner}_runner/{runner}_runner/BUILD": "python_sources()", + f"runners/{runner}_runner/{runner}_runner/__init__.py": "", + f"runners/{runner}_runner/{runner}_runner/{runner}_runner.py": "", + f"runners/{runner}_runner/{runner}_runner/thing.py": dedent( + """\ + def main(): + return True + """ + ), + } + ) + + +@pytest.fixture +def rule_runner() -> RuleRunner: + rule_runner = RuleRunner( + rules=[ + *entry_points_rules(), + *python_target_types_rules(), + QueryRule(InferredDependencies, (InferEntryPointDependencies,)), + QueryRule( + PytestPluginSetup, + (GenerateEntryPointsTxtFromEntryPointDependenciesRequest,), + ), + ], + target_types=[ + PythonDistribution, + PythonSourceTarget, + PythonSourcesGeneratorTarget, + PythonTestTarget, + PythonTestsGeneratorTarget, + ], + objects={ + "python_artifact": PythonArtifact, + }, + ) + write_test_files(rule_runner) + args = [ + "--source-root-patterns=runners/*_runner", + ] + rule_runner.set_options(args, env_inherit={"PATH", "PYENV_ROOT", "HOME"}) + return rule_runner + + +# ----------------------------------------------------------------------------------------------- +# Tests for dependency inference of python targets (python_tests) +# ----------------------------------------------------------------------------------------------- + + +_noop_runner_addresses = ( + Address( + "runners/noop_runner/noop_runner", + relative_file_path="noop_runner.py", + ), + Address( + "runners/noop_runner/noop_runner", + relative_file_path="thing.py", + ), +) + + +@pytest.mark.parametrize( + "requested_entry_points, expected_dep_addresses", + ( + # return no entry points + ([], []), + (["undefined.group"], []), + (["console_scripts/undefined-script"], []), + # return one entry point + (["st2common.runners.runner"], [_noop_runner_addresses[0]]), + (["st2common.runners.runner/noop"], [_noop_runner_addresses[0]]), + (["console_scripts"], [_noop_runner_addresses[1]]), + (["console_scripts/some-thing"], [_noop_runner_addresses[1]]), + # return multiple entry points + (["st2common.runners.runner", "console_scripts"], _noop_runner_addresses), + (["st2common.runners.runner", "console_scripts/some-thing"], _noop_runner_addresses), + (["st2common.runners.runner/noop", "console_scripts/some-thing"], _noop_runner_addresses), + (["st2common.runners.runner/noop", "console_scripts"], _noop_runner_addresses), + # return all entry points + (["st2common.runners.runner", "*"], _noop_runner_addresses), + (["*", "gui_scripts"], _noop_runner_addresses), + (["*"], _noop_runner_addresses), + ), +) +def test_infer_entry_point_dependencies( # also tests get_filtered_entry_point_dependencies + rule_runner: RuleRunner, + requested_entry_points: list[str], + expected_dep_addresses: Iterable[Address], +) -> None: + rule_runner.write_files( + { + "src/foobar/BUILD": dedent( + f"""\ + python_tests( + name="tests", + entry_point_dependencies={{ + "//runners/noop_runner": {repr(requested_entry_points)}, + }}, + ) + """ + ), + "src/foobar/test_something.py": "", + } + ) + + def run_dep_inference(address: Address) -> InferredDependencies: + target = rule_runner.get_target(address) + return rule_runner.request( + InferredDependencies, + [ + InferEntryPointDependencies( + PythonTestsEntryPointDependenciesInferenceFieldSet.create(target) + ) + ], + ) + + # This also asserts that these should NOT be inferred dependencies: + # - anything from distributions other than noop_runner + # - the python_distribution itself at Address(f"runners/noop_runner") + assert run_dep_inference( + Address("src/foobar", target_name="tests", relative_file_path="test_something.py"), + ) == InferredDependencies(expected_dep_addresses) + + +# ----------------------------------------------------------------------------------------------- +# Tests for entry_points.txt generation +# ----------------------------------------------------------------------------------------------- + + +# based on get_snapshot from pantsbuild/pants.git/src/python/pants/backend/python/lint/black/rules_integration_test.py +def get_digest(rule_runner: RuleRunner, source_files: dict[str, str]) -> Digest: + files = [FileContent(path, content.encode()) for path, content in source_files.items()] + return rule_runner.request(Digest, [CreateDigest(files)]) + + +def get_digest_contents(rule_runner: RuleRunner, digest: Digest) -> DigestContents: + return rule_runner.request(DigestContents, [digest]) + + +_foobar_entry_points_txt_files = ( + dedent( + """\ + [console_scripts] + some-thing = foobar_runner.thing:main + + """ + ), + dedent( + """\ + [st2common.runners.runner] + foobar = foobar_runner.foobar_runner + + """ + ), + dedent( + """\ + [console_scripts] + some-thing = foobar_runner.thing:main + + [st2common.runners.runner] + foobar = foobar_runner.foobar_runner + + """ + ), +) + + +@pytest.mark.parametrize( + "requested_entry_points, expected_entry_points_txt", + ( + # return no entry points + ([], None), + (["undefined.group"], None), + (["console_scripts/undefined-script"], None), + # return one entry point + (["st2common.runners.runner"], _foobar_entry_points_txt_files[1]), + (["st2common.runners.runner/foobar"], _foobar_entry_points_txt_files[1]), + (["console_scripts"], _foobar_entry_points_txt_files[0]), + (["console_scripts/some-thing"], _foobar_entry_points_txt_files[0]), + # return multiple entry points + (["st2common.runners.runner", "console_scripts"], _foobar_entry_points_txt_files[2]), + ( + ["st2common.runners.runner", "console_scripts/some-thing"], + _foobar_entry_points_txt_files[2], + ), + ( + ["st2common.runners.runner/foobar", "console_scripts/some-thing"], + _foobar_entry_points_txt_files[2], + ), + ( + ["st2common.runners.runner/foobar", "console_scripts"], + _foobar_entry_points_txt_files[2], + ), + # return all entry points + (["st2common.runners.runner", "*"], _foobar_entry_points_txt_files[2]), + (["*", "gui_scripts"], _foobar_entry_points_txt_files[2]), + (["*"], _foobar_entry_points_txt_files[2]), + ), +) +def test_generate_entry_points_txt_from_entry_point_dependencies( + rule_runner: RuleRunner, + requested_entry_points: list[str], + expected_entry_points_txt: str | None, +) -> None: + rule_runner.write_files( + { + "src/foobar/BUILD": dedent( + f"""\ + python_tests( + name="tests", + entry_point_dependencies={{ + "//runners/foobar_runner": {repr(requested_entry_points)}, + }}, + ) + """ + ), + "src/foobar/test_something.py": "", + } + ) + + entry_points_txt_path = "runners/foobar_runner/foobar_runner.egg-info/entry_points.txt" + if expected_entry_points_txt is None: + expected_digest = EMPTY_DIGEST + else: + expected_digest = get_digest( + rule_runner, + {entry_points_txt_path: expected_entry_points_txt}, + ) + + target = rule_runner.get_target( + Address("src/foobar", target_name="tests", relative_file_path="test_something.py"), + ) + response = rule_runner.request( + PytestPluginSetup, + [GenerateEntryPointsTxtFromEntryPointDependenciesRequest(target)], + ) + contents = get_digest_contents(rule_runner, response.digest) + if expected_entry_points_txt is None: + assert not contents + else: + assert len(contents) == 1 + assert contents[0].path == entry_points_txt_path + assert contents[0].content == expected_entry_points_txt.encode() + + assert response == PytestPluginSetup(expected_digest) From bcbda76d93b0b6bb798b5a1b7ffa557498465f53 Mon Sep 17 00:00:00 2001 From: Jacob Floyd Date: Thu, 20 Jun 2024 02:04:24 -0500 Subject: [PATCH 02/10] Refactor venv export to use explicit `PythonExecutable` lookup (#21087) Instead of relying on the implicit pex interpreter resolution--based on interpreter_constraints--this PR makes the export pre-calculate which interpreter to use. This has the benefit of allowing pants to provide the python interpreter (eg with pyenv) if configured to do so. Without this, the exports can only use pre-installed interpreters. To make this work, I had to prevent the injection of `PexRequest.interpreter_constraints` when `PexRequest.python` is set. This refactor is pre-work for #20516. --- docs/notes/2.23.x.md | 2 + .../pants/backend/python/goals/export.py | 52 ++++++++----------- .../pants/backend/python/util_rules/pex.py | 2 +- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/docs/notes/2.23.x.md b/docs/notes/2.23.x.md index f9e6077b82d..6895e2cef9a 100644 --- a/docs/notes/2.23.x.md +++ b/docs/notes/2.23.x.md @@ -87,6 +87,8 @@ all of the `python_distribution`'s sources, only the specified entry points are is also installed in the pytest sandbox so that tests (or the code under test) can load that metadata via `pkg_resources`. To use this, enable the `pants.backend.experimental.python` backend. +Exported virtualenvs can use Pants-provided Python if a `PythonProvider` backend is enabled (like `pants.backend.python.providers.experimental.pyenv`). Before Pants 2.23, virtualenv exports could only use pre-installed python binaries. + #### Terraform The `tfsec` linter now works on all supported platforms without extra config. diff --git a/src/python/pants/backend/python/goals/export.py b/src/python/pants/backend/python/goals/export.py index a6eeccf4a5b..1a81f07a120 100644 --- a/src/python/pants/backend/python/goals/export.py +++ b/src/python/pants/backend/python/goals/export.py @@ -10,7 +10,7 @@ import uuid from dataclasses import dataclass from enum import Enum -from typing import Any, cast +from typing import cast from pants.backend.python.subsystems.python_tool_base import PythonToolBase from pants.backend.python.subsystems.setup import PythonSetup @@ -20,9 +20,9 @@ EditableLocalDists, EditableLocalDistsRequest, ) -from pants.backend.python.util_rules.pex import Pex, PexProcess, PexRequest, VenvPex, VenvPexProcess +from pants.backend.python.util_rules.pex import Pex, PexRequest, VenvPex from pants.backend.python.util_rules.pex_cli import PexPEX -from pants.backend.python.util_rules.pex_environment import PexEnvironment +from pants.backend.python.util_rules.pex_environment import PexEnvironment, PythonExecutable from pants.backend.python.util_rules.pex_requirements import EntireLockfile, Lockfile from pants.core.goals.export import ( Export, @@ -46,7 +46,7 @@ Snapshot, ) from pants.engine.internals.selectors import Get, MultiGet -from pants.engine.process import ProcessCacheScope, ProcessResult +from pants.engine.process import Process, ProcessCacheScope, ProcessResult from pants.engine.rules import collect_rules, rule from pants.engine.target import AllTargets, HydratedSources, HydrateSourcesRequest, SourcesField from pants.engine.unions import UnionMembership, UnionRule @@ -148,28 +148,20 @@ class ExportPluginOptions: ) -async def _get_full_python_version(pex_or_venv_pex: Pex | VenvPex) -> str: +async def _get_full_python_version(python: PythonExecutable) -> str: # Get the full python version (including patch #). - is_venv_pex = isinstance(pex_or_venv_pex, VenvPex) - kwargs: dict[str, Any] = dict( - description="Get interpreter version", - argv=[ - "-c", - "import sys; print('.'.join(str(x) for x in sys.version_info[0:3]))", - ], - extra_env={"PEX_INTERPRETER": "1"}, - ) - if is_venv_pex: - kwargs["venv_pex"] = pex_or_venv_pex - res = await Get(ProcessResult, VenvPexProcess(**kwargs)) - else: - kwargs["pex"] = pex_or_venv_pex - res = await Get(ProcessResult, PexProcess(**kwargs)) + argv = [ + python.path, + "-c", + "import sys; print('.'.join(str(x) for x in sys.version_info[0:3]))", + ] + res = await Get(ProcessResult, Process(argv, description="Get interpreter version")) return res.stdout.strip().decode() @dataclass(frozen=True) class VenvExportRequest: + py_version: str pex_request: PexRequest dest_prefix: str resolve_name: str @@ -212,13 +204,12 @@ async def do_export( PexRequest, dataclasses.replace(req.pex_request, cache_scope=ProcessCacheScope.PER_SESSION), ) - py_version = await _get_full_python_version(requirements_venv_pex) # Note that for symlinking we ignore qualify_path_with_python_version and always qualify, # since we need some name for the symlink anyway. - dest = f"{dest_prefix}/{py_version}" + dest = f"{dest_prefix}/{req.py_version}" description = ( f"symlink to immutable virtualenv for {req.resolve_name or 'requirements'} " - f"(using Python {py_version})" + f"(using Python {req.py_version})" ) venv_abspath = os.path.join(complete_pex_env.pex_root, requirements_venv_pex.venv_rel_dir) return ExportResult( @@ -237,14 +228,13 @@ async def do_export( # See the build_pex() rule and _determine_pex_python_and_platforms() helper in pex.py. requirements_pex = await Get(Pex, PexRequest, req.pex_request) assert requirements_pex.python is not None - py_version = await _get_full_python_version(requirements_pex) if req.qualify_path_with_python_version: - dest = f"{dest_prefix}/{py_version}" + dest = f"{dest_prefix}/{req.py_version}" else: dest = dest_prefix description = ( f"mutable virtualenv for {req.resolve_name or 'requirements'} " - f"(using Python {py_version})" + f"(using Python {req.py_version})" ) merged_digest = await Get(Digest, MergeDigests([pex_pex.digest, requirements_pex.digest])) @@ -252,7 +242,7 @@ async def do_export( tmpdir_under_digest_root = os.path.join("{digest_root}", tmpdir_prefix) merged_digest_under_tmpdir = await Get(Digest, AddPrefix(merged_digest, tmpdir_prefix)) - venv_prompt = f"{req.resolve_name}/{py_version}" if req.resolve_name else py_version + venv_prompt = f"{req.resolve_name}/{req.py_version}" if req.resolve_name else req.py_version pex_args = [ os.path.join(tmpdir_under_digest_root, requirements_pex.name), @@ -286,7 +276,7 @@ async def do_export( # - pkg_name-1.2.3-0.editable-py3-none-any.whl wheels_snapshot = await Get(Snapshot, Digest, req.editable_local_dists_digest) # We need the paths to the installed .dist-info directories to finish installation. - py_major_minor_version = ".".join(py_version.split(".", 2)[:2]) + py_major_minor_version = ".".join(req.py_version.split(".", 2)[:2]) lib_dir = os.path.join( output_path, "lib", f"python{py_major_minor_version}", "site-packages" ) @@ -533,6 +523,9 @@ async def export_virtualenv_for_resolve( ) ) + python = await Get(PythonExecutable, InterpreterConstraints, interpreter_constraints) + py_version = await _get_full_python_version(python) + if resolve in export_subsys.options.py_editable_in_resolve: editable_local_dists = await Get( EditableLocalDists, EditableLocalDistsRequest(resolve=resolve) @@ -547,7 +540,7 @@ async def export_virtualenv_for_resolve( internal_only=True, requirements=EntireLockfile(lockfile), sources=editable_local_dists_digest, - interpreter_constraints=interpreter_constraints, + python=python, # Packed layout should lead to the best performance in this use case. layout=PexLayout.PACKED, ) @@ -556,6 +549,7 @@ async def export_virtualenv_for_resolve( export_result = await Get( ExportResult, VenvExportRequest( + py_version, pex_request, dest_prefix, resolve, diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py index 0a32538e9b2..fc998944924 100644 --- a/src/python/pants/backend/python/util_rules/pex.py +++ b/src/python/pants/backend/python/util_rules/pex.py @@ -646,7 +646,7 @@ async def build_pex( ) -> BuildPexResult: """Returns a PEX with the given settings.""" - if not request.interpreter_constraints: + if not request.python and not request.interpreter_constraints: # Blank ICs in the request means that the caller wants us to use the ICs configured # for the resolve (falling back to the global ICs). resolve_name = "" From 925cfb60db001d89b537bc5f4801c8b5bfbf2b3c Mon Sep 17 00:00:00 2001 From: Tom Dyas Date: Thu, 20 Jun 2024 11:11:41 -0400 Subject: [PATCH 03/10] add intrinsic rule to request metadata about paths in filesystem (#20996) Add a new `path_metadata_request` intrinsic rule to allow rule code to request metadata about paths in the filesystem. The intended uses are to support: - Some form of metadata-based invalidation for `adhoc_tool` and `shell_command` targets, as explored in the proof of concept in https://github.com/pantsbuild/pants/pull/20914. (This PR in fact is split out from that other PR.) - Future potential work to avoid indefinite negative caching of PATH-style lookups (which is a problem of the existing BinaryPath lookups) by switching to direct monitoring of system-level paths. This API would likely be extended to that use case. (But I am not yet committing to any particular solution. Still, some form of metadata API will be useful.) Metadata is represented by the `PathMetadata` dataclass. `PathMetadataRequest` and `PathMetadataResult` are the input/output types, respectively, for the intrinsic. Note: `NodeKey::fs_path_to_watch` is introduced to allow `NodeKey::PathMetadata` to watch the parent directory even though `fs_subject` remains its configured path. This is necessary because the watch code will error with "file not found" if the path to be watched does not exist. The solution is to watch the parent directory and wait for creation / removal events related to the `fs_subject`. --- docs/notes/2.23.x.md | 2 + src/python/pants/engine/fs.py | 22 +++ src/python/pants/engine/fs_test.py | 74 +++++++++- .../pants/engine/internals/native_engine.pyi | 29 ++++ .../pants/engine/internals/scheduler.py | 4 + src/python/pants/engine/intrinsics.py | 7 + src/rust/engine/fs/src/lib.rs | 127 +++++++++++++++++- src/rust/engine/src/externs/fs.rs | 83 ++++++++++++ src/rust/engine/src/externs/interface.rs | 4 + src/rust/engine/src/intrinsics/digests.rs | 46 ++++++- src/rust/engine/src/nodes/mod.rs | 44 +++++- src/rust/engine/src/nodes/path_metadata.rs | 60 +++++++++ src/rust/engine/src/types.rs | 2 + 13 files changed, 497 insertions(+), 7 deletions(-) create mode 100644 src/rust/engine/src/nodes/path_metadata.rs diff --git a/docs/notes/2.23.x.md b/docs/notes/2.23.x.md index 6895e2cef9a..4bf5272702b 100644 --- a/docs/notes/2.23.x.md +++ b/docs/notes/2.23.x.md @@ -128,6 +128,8 @@ Fixed bug where files larger than 512KB were being materialized to a process's s The "Provided by" information in the documentation now correctly reflects the proper backend to enable to activate a certain feature. +Metadata for paths in the repository can now be requested via the `PathMetadataRequest` and `PathMetadataResult` types. This API is intended for rules which need access to the "full" metadata for a path. + ### New call-by-name syntax for @rules Pants has a new mechanism for `@rule` invocation in backends. In this release the following backends were migrated to use this new mechanism. There should not be any user-visible effects, but please be on the lookout for any unusual bugs or error messages. diff --git a/src/python/pants/engine/fs.py b/src/python/pants/engine/fs.py index 95fcaa4ebc0..3ce9f1dbdbf 100644 --- a/src/python/pants/engine/fs.py +++ b/src/python/pants/engine/fs.py @@ -20,6 +20,7 @@ from pants.engine.internals.native_engine import Digest as Digest from pants.engine.internals.native_engine import FileDigest as FileDigest from pants.engine.internals.native_engine import MergeDigests as MergeDigests +from pants.engine.internals.native_engine import PathMetadata from pants.engine.internals.native_engine import RemovePrefix as RemovePrefix from pants.engine.internals.native_engine import Snapshot as Snapshot from pants.engine.rules import QueryRule @@ -330,6 +331,27 @@ def from_snapshots(cls, ours: Snapshot, theirs: Snapshot) -> SnapshotDiff: return cls(*ours._diff(theirs)) +@dataclass(frozen=True) +class PathMetadataRequest: + """Request the full metadata of a single path in the filesystem. + + Note: This API is symlink-aware and will distinguish between symlinks and regular files. + """ + + path: str + + +@dataclass(frozen=True) +class PathMetadataResult: + """Result of requesting the metadata for a path in the filesystem. + + The `metadata` field will contain the metadata for the requested path, or else `None` if the + path does not exist. + """ + + metadata: PathMetadata | None + + def rules(): return ( QueryRule(Digest, (CreateDigest,)), diff --git a/src/python/pants/engine/fs_test.py b/src/python/pants/engine/fs_test.py index 801ab8c1500..928856f276c 100644 --- a/src/python/pants/engine/fs_test.py +++ b/src/python/pants/engine/fs_test.py @@ -15,7 +15,7 @@ from http.server import BaseHTTPRequestHandler from io import BytesIO from pathlib import Path -from typing import Callable, Dict, Iterable, Optional, Set, Union +from typing import Any, Callable, Dict, Iterable, Optional, Set, Union import pytest @@ -38,6 +38,8 @@ MergeDigests, PathGlobs, PathGlobsAndRoot, + PathMetadataRequest, + PathMetadataResult, RemovePrefix, Snapshot, SnapshotDiff, @@ -45,6 +47,7 @@ Workspace, ) from pants.engine.goal import Goal, GoalSubsystem +from pants.engine.internals.native_engine import PathMetadata, PathMetadataKind from pants.engine.internals.scheduler import ExecutionError from pants.engine.rules import Get, goal_rule, rule from pants.testutil.rule_runner import QueryRule, RuleRunner @@ -64,6 +67,7 @@ def rule_runner() -> RuleRunner: QueryRule(Snapshot, [CreateDigest]), QueryRule(Snapshot, [DigestSubset]), QueryRule(Snapshot, [PathGlobs]), + QueryRule(PathMetadataResult, [PathMetadataRequest]), ], isolated_local_store=True, ) @@ -1502,3 +1506,71 @@ def test_snapshot_diff( assert diff.their_unique_files == expected_diff.our_unique_files assert diff.their_unique_dirs == expected_diff.our_unique_dirs assert diff.changed_files == expected_diff.changed_files + + +def retry_failed_assertions( + callable: Callable[[], Any], n: int, sleep_duration: float = 0.05 +) -> None: + """Retry the callable if any assertions failed. + + This is used to handle any failures resulting from an external system not fully processing + certain events as expected. + """ + last_exception: BaseException | None = None + + while n > 0: + try: + callable() + return + except AssertionError as e: + last_exception = e + n -= 1 + time.sleep(sleep_duration) + sleep_duration *= 2 + + assert last_exception is not None + raise last_exception + + +def test_path_metadata_request(rule_runner: RuleRunner) -> None: + rule_runner.write_files( + { + "foo": b"xyzzy", + "sub-dir/bar": b"12345", + } + ) + os.symlink("foo", os.path.join(rule_runner.build_root, "bar")) + + def get_metadata(path: str) -> PathMetadata | None: + result = rule_runner.request(PathMetadataResult, [PathMetadataRequest(path)]) + return result.metadata + + m1 = get_metadata("foo") + assert m1 is not None + assert m1.path == "foo" + assert m1.kind == PathMetadataKind.FILE + assert m1.length == len(b"xyzzy") + assert m1.symlink_target is None + + m2 = get_metadata("not-found") + assert m2 is None + (Path(rule_runner.build_root) / "not-found").write_bytes(b"is found") + + def check_metadata_exists() -> None: + m3 = get_metadata("not-found") + assert m3 is not None + + retry_failed_assertions(check_metadata_exists, 3) + + m4 = get_metadata("bar") + assert m4 is not None + assert m4.path == "bar" + assert m4.kind == PathMetadataKind.SYMLINK + assert m4.length == 3 + assert m4.symlink_target == "foo" + + m5 = get_metadata("sub-dir") + assert m5 is not None + assert m5.path == "sub-dir" + assert m5.kind == PathMetadataKind.DIRECTORY + assert m5.symlink_target is None diff --git a/src/python/pants/engine/internals/native_engine.pyi b/src/python/pants/engine/internals/native_engine.pyi index 53288fe428b..8ac01f10ff5 100644 --- a/src/python/pants/engine/internals/native_engine.pyi +++ b/src/python/pants/engine/internals/native_engine.pyi @@ -3,6 +3,7 @@ from __future__ import annotations +from datetime import datetime from io import RawIOBase from typing import ( Any, @@ -30,6 +31,8 @@ from pants.engine.fs import ( DigestSubset, NativeDownloadFile, PathGlobs, + PathMetadataRequest, + PathMetadataResult, Paths, ) from pants.engine.internals.docker import DockerResolveImageRequest, DockerResolveImageResult @@ -492,6 +495,31 @@ EMPTY_SNAPSHOT: Snapshot def default_cache_path() -> str: ... +class PathMetadataKind: + FILE: PathMetadataKind = ... + DIRECTORY: PathMetadataKind = ... + SYMLINK: PathMetadataKind = ... + +class PathMetadata: + @property + def path(self) -> str: ... + @property + def kind(self) -> PathMetadataKind: ... + @property + def length(self) -> int: ... + @property + def is_executable(self) -> bool: ... + @property + def unix_mode(self) -> int | None: ... + @property + def accessed(self) -> datetime | None: ... + @property + def created(self) -> datetime | None: ... + @property + def modified(self) -> datetime | None: ... + @property + def symlink_target(self) -> str | None: ... + # ------------------------------------------------------------------------------ # Intrinsics # ------------------------------------------------------------------------------ @@ -530,6 +558,7 @@ async def parse_python_deps( async def parse_javascript_deps( deps_request: NativeDependenciesRequest, ) -> NativeParsedJavascriptDependencies: ... +async def path_metadata_request(request: PathMetadataRequest) -> PathMetadataResult: ... # ------------------------------------------------------------------------------ # `pantsd` diff --git a/src/python/pants/engine/internals/scheduler.py b/src/python/pants/engine/internals/scheduler.py index 2b32657d071..9561da54042 100644 --- a/src/python/pants/engine/internals/scheduler.py +++ b/src/python/pants/engine/internals/scheduler.py @@ -28,6 +28,8 @@ NativeDownloadFile, PathGlobs, PathGlobsAndRoot, + PathMetadataRequest, + PathMetadataResult, Paths, Snapshot, SymlinkEntry, @@ -152,6 +154,8 @@ def __init__( # Create the native Scheduler and Session. types = PyTypes( paths=Paths, + path_metadata_request=PathMetadataRequest, + path_metadata_result=PathMetadataResult, file_content=FileContent, file_entry=FileEntry, symlink_entry=SymlinkEntry, diff --git a/src/python/pants/engine/intrinsics.py b/src/python/pants/engine/intrinsics.py index 1e55a743f28..459495a2955 100644 --- a/src/python/pants/engine/intrinsics.py +++ b/src/python/pants/engine/intrinsics.py @@ -13,6 +13,8 @@ MergeDigests, NativeDownloadFile, PathGlobs, + PathMetadataRequest, + PathMetadataResult, Paths, RemovePrefix, Snapshot, @@ -143,6 +145,11 @@ async def parse_javascript_deps( return await native_engine.parse_javascript_deps(deps_request) +@rule +async def path_metadata_request(request: PathMetadataRequest) -> PathMetadataResult: + return await native_engine.path_metadata_request(request) + + def rules(): return [ *collect_rules(), diff --git a/src/rust/engine/fs/src/lib.rs b/src/rust/engine/fs/src/lib.rs index efa2b0d4f29..69d581dfdbb 100644 --- a/src/rust/engine/fs/src/lib.rs +++ b/src/rust/engine/fs/src/lib.rs @@ -23,11 +23,12 @@ pub use crate::glob_matching::{ }; use std::cmp::min; -use std::io; +use std::io::{self, ErrorKind}; use std::ops::Deref; use std::os::unix::fs::PermissionsExt; use std::path::{Component, Path, PathBuf}; use std::sync::Arc; +use std::time::SystemTime; use std::{fmt, fs}; use async_trait::async_trait; @@ -236,6 +237,45 @@ impl PathStat { } } +/// The kind of path (e.g., file, directory, symlink) as identified in `PathMetadata` +#[derive(Clone, Copy, Debug, DeepSizeOf, Eq, PartialEq)] +pub enum PathMetadataKind { + File, + Directory, + Symlink, +} + +/// Expanded version of `Stat` when access to additional filesystem attributes is necessary. +#[derive(Clone, Debug, DeepSizeOf, Eq, PartialEq)] +pub struct PathMetadata { + /// Path to this filesystem entry. + pub path: PathBuf, + + /// The kind of file at the path. + pub kind: PathMetadataKind, + + /// Length of the filesystem entry. + pub length: u64, + + /// True if the entry is marked executable. + pub is_executable: bool, + + /// UNIX mode (if available) + pub unix_mode: Option, + + /// Modification time of the path (if available). + pub accessed: Option, + + /// Modification time of the path (if available). + pub created: Option, + + /// Modification time of the path (if available). + pub modified: Option, + + /// Symlink target + pub symlink_target: Option, +} + #[derive(Debug, DeepSizeOf, Eq, PartialEq)] pub struct DirectoryListing(pub Vec); @@ -559,6 +599,43 @@ impl PosixFS { _ => Err(err), }) } + + pub async fn path_metadata(&self, path: PathBuf) -> Result, io::Error> { + let abs_path = self.root.0.join(&path); + match tokio::fs::symlink_metadata(&abs_path).await { + Ok(metadata) => { + let (kind, symlink_target) = match metadata.file_type() { + ft if ft.is_symlink() => { + let symlink_target = tokio::fs::read_link(&abs_path).await.map_err(|e| io::Error::other(format!("path {abs_path:?} was previously a symlink but read_link failed: {e}")))?; + (PathMetadataKind::Symlink, Some(symlink_target)) + } + ft if ft.is_dir() => (PathMetadataKind::Directory, None), + ft if ft.is_file() => (PathMetadataKind::File, None), + _ => unreachable!("std::fs::FileType was not a symlink, directory, or file"), + }; + + #[cfg(target_family = "unix")] + let (unix_mode, is_executable) = { + let mode = metadata.permissions().mode(); + (Some(mode), (mode & 0o111) != 0) + }; + + Ok(Some(PathMetadata { + path, + kind, + length: metadata.len(), + is_executable, + unix_mode, + accessed: metadata.accessed().ok(), + created: metadata.created().ok(), + modified: metadata.modified().ok(), + symlink_target, + })) + } + Err(err) if err.kind() == ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } + } } #[async_trait] @@ -571,6 +648,10 @@ impl Vfs for Arc { Ok(Arc::new(PosixFS::scandir(self, dir).await?)) } + async fn path_metadata(&self, path: PathBuf) -> Result, io::Error> { + PosixFS::path_metadata(self, path).await + } + fn is_ignored(&self, stat: &Stat) -> bool { PosixFS::is_ignored(self, stat) } @@ -648,6 +729,49 @@ impl Vfs for DigestTrie { ))) } + async fn path_metadata(&self, path: PathBuf) -> Result, String> { + let entry = match self.entry(&path)? { + Some(e) => e, + None => return Ok(None), + }; + + Ok(Some(match entry { + directory::Entry::File(f) => PathMetadata { + path, + kind: PathMetadataKind::File, + length: entry.digest().size_bytes as u64, + is_executable: f.is_executable(), + unix_mode: None, + accessed: None, + created: None, + modified: None, + symlink_target: None, + }, + directory::Entry::Symlink(s) => PathMetadata { + path, + kind: PathMetadataKind::Symlink, + length: 0, + is_executable: false, + unix_mode: None, + accessed: None, + created: None, + modified: None, + symlink_target: Some(s.target().to_path_buf()), + }, + directory::Entry::Directory(_) => PathMetadata { + path, + kind: PathMetadataKind::Directory, + length: entry.digest().size_bytes as u64, + is_executable: false, + unix_mode: None, + accessed: None, + created: None, + modified: None, + symlink_target: None, + }, + })) + } + fn is_ignored(&self, _stat: &Stat) -> bool { false } @@ -664,6 +788,7 @@ impl Vfs for DigestTrie { pub trait Vfs: Clone + Send + Sync + 'static { async fn read_link(&self, link: &Link) -> Result; async fn scandir(&self, dir: Dir) -> Result, E>; + async fn path_metadata(&self, path: PathBuf) -> Result, E>; fn is_ignored(&self, stat: &Stat) -> bool; fn mk_error(msg: &str) -> E; } diff --git a/src/rust/engine/src/externs/fs.rs b/src/rust/engine/src/externs/fs.rs index aadf3a96854..5efaa1738e3 100644 --- a/src/rust/engine/src/externs/fs.rs +++ b/src/rust/engine/src/externs/fs.rs @@ -5,6 +5,7 @@ use std::collections::hash_map::DefaultHasher; use std::fmt; use std::hash::{Hash, Hasher}; use std::path::{Path, PathBuf}; +use std::time::SystemTime; use itertools::Itertools; use pyo3::basic::CompareOp; @@ -29,6 +30,8 @@ pub(crate) fn register(m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add("EMPTY_DIGEST", PyDigest(EMPTY_DIRECTORY_DIGEST.clone()))?; m.add("EMPTY_FILE_DIGEST", PyFileDigest(EMPTY_DIGEST))?; @@ -469,6 +472,86 @@ impl PyFilespecMatcher { } } +// ----------------------------------------------------------------------------- +// Path Metadata +// ----------------------------------------------------------------------------- + +/// The kind of path (e.g., file, directory, symlink) as identified in `PathMetadata` +#[pyclass(name = "PathMetadataKind", rename_all = "UPPERCASE")] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum PyPathMetadataKind { + File, + Directory, + Symlink, +} + +impl From for PyPathMetadataKind { + fn from(value: fs::PathMetadataKind) -> Self { + match value { + fs::PathMetadataKind::File => PyPathMetadataKind::File, + fs::PathMetadataKind::Directory => PyPathMetadataKind::Directory, + fs::PathMetadataKind::Symlink => PyPathMetadataKind::Symlink, + } + } +} + +/// Expanded version of `Stat` when access to additional filesystem attributes is necessary. +#[pyclass(name = "PathMetadata")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PyPathMetadata(pub fs::PathMetadata); + +#[pymethods] +impl PyPathMetadata { + #[getter] + pub fn path(&self) -> PyResult { + Ok(self.0.path.clone()) + } + + #[getter] + pub fn kind(&self) -> PyResult { + Ok(PyPathMetadataKind::from(self.0.kind)) + } + + #[getter] + pub fn length(&self) -> PyResult { + Ok(self.0.length) + } + + #[getter] + pub fn is_executable(&self) -> PyResult { + Ok(self.0.is_executable) + } + + #[getter] + pub fn unix_mode(&self) -> PyResult> { + Ok(self.0.unix_mode) + } + + #[getter] + pub fn accessed(&self) -> PyResult> { + Ok(self.0.accessed) + } + + #[getter] + pub fn created(&self) -> PyResult> { + Ok(self.0.created) + } + + #[getter] + pub fn modified(&self) -> PyResult> { + Ok(self.0.modified) + } + + #[getter] + pub fn symlink_target(&self) -> PyResult> { + Ok(self.0.symlink_target.clone()) + } + + fn __repr__(&self) -> String { + format!("{:?}", self.0) + } +} + // ----------------------------------------------------------------------------- // Utils // ----------------------------------------------------------------------------- diff --git a/src/rust/engine/src/externs/interface.rs b/src/rust/engine/src/externs/interface.rs index 161134494fe..51ac44aa466 100644 --- a/src/rust/engine/src/externs/interface.rs +++ b/src/rust/engine/src/externs/interface.rs @@ -180,6 +180,8 @@ impl PyTypes { #[new] fn __new__( paths: &PyType, + path_metadata_request: &PyType, + path_metadata_result: &PyType, file_content: &PyType, file_entry: &PyType, symlink_entry: &PyType, @@ -211,6 +213,8 @@ impl PyTypes { file_digest: TypeId::new(py.get_type::()), snapshot: TypeId::new(py.get_type::()), paths: TypeId::new(paths), + path_metadata_request: TypeId::new(path_metadata_request), + path_metadata_result: TypeId::new(path_metadata_result), file_content: TypeId::new(file_content), file_entry: TypeId::new(file_entry), symlink_entry: TypeId::new(symlink_entry), diff --git a/src/rust/engine/src/intrinsics/digests.rs b/src/rust/engine/src/intrinsics/digests.rs index 10191e08c35..bb573866652 100644 --- a/src/rust/engine/src/intrinsics/digests.rs +++ b/src/rust/engine/src/intrinsics/digests.rs @@ -3,20 +3,25 @@ use std::collections::HashMap; use std::path::PathBuf; +use std::str::FromStr; use fs::{ DigestTrie, DirectoryDigest, GlobMatching, PathStat, RelativePath, SymlinkBehavior, TypedPath, }; use hashing::{Digest, EMPTY_DIGEST}; use pyo3::prelude::{pyfunction, wrap_pyfunction, PyModule, PyRef, PyResult, Python}; +use pyo3::types::PyTuple; +use pyo3::IntoPy; use store::{SnapshotOps, SubsetParams}; use crate::externs; -use crate::externs::fs::{PyAddPrefix, PyFileDigest, PyMergeDigests, PyRemovePrefix}; +use crate::externs::fs::{ + PyAddPrefix, PyFileDigest, PyMergeDigests, PyPathMetadata, PyRemovePrefix, +}; use crate::externs::PyGeneratorResponseNativeCall; use crate::nodes::{ lift_directory_digest, task_get_context, unmatched_globs_additional_context, DownloadedFile, - NodeResult, Snapshot, + NodeResult, PathMetadataNode, Snapshot, }; use crate::python::{throw, Key, Value}; use crate::Failure; @@ -33,6 +38,7 @@ pub fn register(_py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(path_globs_to_digest, m)?)?; m.add_function(wrap_pyfunction!(path_globs_to_paths, m)?)?; m.add_function(wrap_pyfunction!(remove_prefix_request_to_digest, m)?)?; + m.add_function(wrap_pyfunction!(path_metadata_request, m)?)?; Ok(()) } @@ -354,3 +360,39 @@ fn digest_subset_to_digest(digest_subset: Value) -> PyGeneratorResponseNativeCal })?) }) } + +#[pyfunction] +fn path_metadata_request(single_path: Value) -> PyGeneratorResponseNativeCall { + PyGeneratorResponseNativeCall::new(async move { + let path = Python::with_gil(|py| { + let arg = (*single_path).as_ref(py); + externs::getattr_as_optional_string(arg, "path") + .map_err(|e| format!("Failed to get `path` for field: {e}")) + })? + .expect("path field for intrinsic"); + + let context = task_get_context(); + let metadata_opt = context + .get(PathMetadataNode::new(PathBuf::from_str(&path).unwrap())) + .await? + .map(PyPathMetadata); + + Ok(Python::with_gil(|py| { + let path_metadata_opt = match metadata_opt { + Some(m) => m.into_py(py), + None => py.None(), + }; + + let py_type = context.core.types.path_metadata_result.as_py_type(py); + let args_tuple = PyTuple::new_bound(py, &[path_metadata_opt]); + let res = py_type.call1(args_tuple).unwrap_or_else(|e| { + panic!( + "Core type constructor `{}` failed: {:?}", + py_type.name().unwrap(), + e + ); + }); + Value::new(res.into_py(py)) + })) + }) +} diff --git a/src/rust/engine/src/nodes/mod.rs b/src/rust/engine/src/nodes/mod.rs index fd0c81b7ecc..4c3fb1cd777 100644 --- a/src/rust/engine/src/nodes/mod.rs +++ b/src/rust/engine/src/nodes/mod.rs @@ -9,7 +9,7 @@ use std::sync::Arc; use async_trait::async_trait; use deepsize::DeepSizeOf; -use fs::{self, Dir, DirectoryDigest, DirectoryListing, File, Link, Vfs}; +use fs::{self, Dir, DirectoryDigest, DirectoryListing, File, Link, PathMetadata, Vfs}; use futures::future::{self, BoxFuture, FutureExt, TryFutureExt}; use graph::{Node, NodeError}; use internment::Intern; @@ -29,6 +29,7 @@ use crate::tasks::Rule; mod digest_file; mod downloaded_file; mod execute_process; +mod path_metadata; mod read_link; mod root; mod run_id; @@ -41,6 +42,7 @@ mod task; pub use self::digest_file::DigestFile; pub use self::downloaded_file::DownloadedFile; pub use self::execute_process::{ExecuteProcess, ProcessResult}; +pub use self::path_metadata::PathMetadata as PathMetadataNode; pub use self::read_link::{LinkDest, ReadLink}; pub use self::root::Root; pub use self::run_id::RunId; @@ -98,6 +100,10 @@ impl Vfs for Context { self.get(Scandir(dir)).await } + async fn path_metadata(&self, path: PathBuf) -> Result, Failure> { + self.get(PathMetadataNode::new(path)).await + } + fn is_ignored(&self, stat: &fs::Stat) -> bool { self.core.vfs.is_ignored(stat) } @@ -226,6 +232,7 @@ pub enum NodeKey { ExecuteProcess(Box), ReadLink(ReadLink), Scandir(Scandir), + PathMetadata(PathMetadataNode), Root(Box), Snapshot(Snapshot), SessionValues(SessionValues), @@ -234,11 +241,16 @@ pub enum NodeKey { } impl NodeKey { + /// Returns filesystem path (if any) in which this node is interested. Any changes to that path + /// will invalidate the applicable graph node. + /// + /// See `fs_path_to_watch` for where the filesystem watch is actually placed. pub fn fs_subject(&self) -> Option<&Path> { match self { NodeKey::DigestFile(s) => Some(s.0.path.as_path()), NodeKey::ReadLink(s) => Some((s.0).path.as_path()), NodeKey::Scandir(s) => Some((s.0).0.as_path()), + NodeKey::PathMetadata(fs) => Some(fs.path()), // Not FS operations: // Explicitly listed so that if people add new NodeKeys they need to consider whether their @@ -254,6 +266,22 @@ impl NodeKey { } } + /// Returns the filesystem path where the inotify watch should be attached. + /// + /// This is not the same as `fs_subject`. There is a difference between the path on which to place the filesystem + /// watch and the path which actually invalidates this node. This is necessary because in the existing inotify logic, + /// if the path does not exist, then trying to place a watch on the path will fail with a "file not found" error. + pub fn fs_path_to_watch(&self) -> Option<&Path> { + match self { + // For `PathMetadata`, watch the parent directory so that nonexistence of the path can be monitored + // since creation/deletion events occur on the parent directory. + NodeKey::PathMetadata(fs) => Some(fs.path().parent().unwrap_or_else(|| Path::new("."))), + + // For all other node types, attach the watch to the actual path since the path is assumed or known to exist. + _ => self.fs_subject(), + } + } + fn workunit_level(&self) -> Level { match self { NodeKey::Task(ref task) => task.task.display_info.level, @@ -281,6 +309,7 @@ impl NodeKey { NodeKey::DownloadedFile(..) => "downloaded_file", NodeKey::ReadLink(..) => "read_link", NodeKey::Scandir(..) => "scandir", + NodeKey::PathMetadata(..) => "path_metadata", NodeKey::Root(..) => "root", NodeKey::SessionValues(..) => "session_values", NodeKey::RunId(..) => "run_id", @@ -331,6 +360,7 @@ impl NodeKey { NodeKey::Scandir(Scandir(Dir(path))) => { Some(format!("Reading directory: {}", path.display())) } + NodeKey::PathMetadata(fs) => Some(format!("Checking path: {}", fs.path().display())), NodeKey::DownloadedFile(..) | NodeKey::Root(..) | NodeKey::SessionValues(..) @@ -339,7 +369,7 @@ impl NodeKey { } async fn maybe_watch(&self, context: &Context) -> NodeResult<()> { - if let Some((path, watcher)) = self.fs_subject().zip(context.core.watcher.as_ref()) { + if let Some((path, watcher)) = self.fs_path_to_watch().zip(context.core.watcher.as_ref()) { let abs_path = context.core.build_root.join(path); watcher .watch(abs_path) @@ -419,6 +449,9 @@ impl Node for NodeKey { NodeKey::Scandir(n) => { n.run_node(context).await.map(NodeOutput::DirectoryListing) } + NodeKey::PathMetadata(n) => { + n.run_node(context).await.map(NodeOutput::PathMetadata) + } NodeKey::Root(n) => n.run_node(context).await.map(NodeOutput::Value), NodeKey::Snapshot(n) => n.run_node(context).await.map(NodeOutput::Snapshot), NodeKey::SessionValues(n) => n.run_node(context).await.map(NodeOutput::Value), @@ -521,6 +554,7 @@ impl Display for NodeKey { } NodeKey::ReadLink(s) => write!(f, "ReadLink({})", (s.0).path.display()), NodeKey::Scandir(s) => write!(f, "Scandir({})", (s.0).0.display()), + NodeKey::PathMetadata(s) => write!(f, "PathMetadata({})", s.path().display()), NodeKey::Root(s) => write!(f, "{}", s.product), NodeKey::Task(task) => { let params = { @@ -565,6 +599,7 @@ pub enum NodeOutput { Snapshot(store::Snapshot), DirectoryListing(Arc), LinkDest(LinkDest), + PathMetadata(Option), ProcessResult(Box), Value(Value), } @@ -586,7 +621,10 @@ impl NodeOutput { digests.push(p.result.stderr_digest); digests } - NodeOutput::DirectoryListing(_) | NodeOutput::LinkDest(_) | NodeOutput::Value(_) => { + NodeOutput::DirectoryListing(_) + | NodeOutput::LinkDest(_) + | NodeOutput::Value(_) + | NodeOutput::PathMetadata(_) => { vec![] } } diff --git a/src/rust/engine/src/nodes/path_metadata.rs b/src/rust/engine/src/nodes/path_metadata.rs new file mode 100644 index 00000000000..d3f15e50afc --- /dev/null +++ b/src/rust/engine/src/nodes/path_metadata.rs @@ -0,0 +1,60 @@ +// Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). +// Licensed under the Apache License, Version 2.0 (see LICENSE). + +use std::path::{Path, PathBuf}; + +use deepsize::DeepSizeOf; +use graph::CompoundNode; + +use super::{NodeKey, NodeOutput, NodeResult}; +use crate::context::Context; +use crate::python::throw; + +/// +/// A `Node` that represents reading the filesystem metadata of a path. +/// +#[derive(Clone, Debug, DeepSizeOf, Eq, Hash, PartialEq)] +pub struct PathMetadata { + path: PathBuf, +} + +impl PathMetadata { + pub fn new(path: PathBuf) -> Self { + Self { path } + } + + pub fn path(&self) -> &Path { + &self.path + } + + pub(super) async fn run_node(self, context: Context) -> NodeResult> { + let node = self; + context + .core + .vfs + .path_metadata(node.path.clone()) + .await + .map_err(|e| throw(format!("{e}"))) + } +} + +impl CompoundNode for PathMetadata { + type Item = Option; +} + +impl From for NodeKey { + fn from(fs: PathMetadata) -> Self { + NodeKey::PathMetadata(fs) + } +} + +impl TryFrom for Option { + type Error = (); + + fn try_from(nr: NodeOutput) -> Result { + match nr { + NodeOutput::PathMetadata(v) => Ok(v), + _ => Err(()), + } + } +} diff --git a/src/rust/engine/src/types.rs b/src/rust/engine/src/types.rs index edffd430c1a..69e3065d2da 100644 --- a/src/rust/engine/src/types.rs +++ b/src/rust/engine/src/types.rs @@ -8,6 +8,8 @@ pub struct Types { pub file_digest: TypeId, pub snapshot: TypeId, pub paths: TypeId, + pub path_metadata_request: TypeId, + pub path_metadata_result: TypeId, pub file_content: TypeId, pub file_entry: TypeId, pub symlink_entry: TypeId, From ad47fe4b629d84ec305214c4dff7d7a6077c49ce Mon Sep 17 00:00:00 2001 From: Alexey Tereshenkov <50622389+AlexTereshenkov@users.noreply.github.com> Date: Thu, 20 Jun 2024 22:15:59 +0100 Subject: [PATCH 04/10] docs: there may be multiple BUILD files in a directory (#20818) --- .../key-concepts/targets-and-build-files.mdx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/docs/using-pants/key-concepts/targets-and-build-files.mdx b/docs/docs/using-pants/key-concepts/targets-and-build-files.mdx index 78b7e0ca30c..835defd5be1 100644 --- a/docs/docs/using-pants/key-concepts/targets-and-build-files.mdx +++ b/docs/docs/using-pants/key-concepts/targets-and-build-files.mdx @@ -60,6 +60,21 @@ python_distribution( ) ``` +### Multiple BUILD files in a directory + +Typically, there would be one BUILD file in every directory containing source code and any other resources you +may want to use as part of your builds. Most likely, having just one BUILD file in a directory is also what you would want. +However, you can have multiple BUILD files in a single directory, if desired. When running a Pants goal, the contents +of the BUILD files will be merged making it possible to better group your targets. + +Storing targets in multiple BUILD files also makes it possible to dynamically include or exclude targets from your +builds. For example, you could include some experimental targets when running a Pants goal from a command line by +extending the list of recognized BUILD file patterns: + +```bash +$ pants --build-patterns="+['BUILD.experimental']" package project:app +``` + ## Target addresses A target is identified by its unique address, in the form `path/to/dir:name`. The above example has the addresses `helloworld/greet:tests` and `helloworld/greet:bin`. From 87bdf3a24ac3d02916424066bf9b9b9ece0eadfb Mon Sep 17 00:00:00 2001 From: Tobias Nilsson Date: Sat, 22 Jun 2024 06:29:23 +0200 Subject: [PATCH 05/10] Always capture a `corepack` installation and support omitting version strings (#21021) When a user _just_ provided ```toml [nodejs] package_manager = 'pnpm' ``` and not providing which version to use, `pants` would invoke `corepack` with illegal arguments. The intention was to support this mode, and defer the decision of what version of `pnpm` the user gets to `corepack`, as it curates releases of package managers. This is now achieved by passing the `--all` flag for these cases. Additionally, when the package manager is configured via a `package.json` file not located at the build root, `corepack` outputs would not be captured, resulting in failures because corepack would attempt to download the package managers into what was by then an immutable digest. This is resolved by removing the `working_directory` and input digest logic from the invocation. Since versions are parsed prior, there is never a case that the `package.json` is read by `corepack` where pants hasn't already parsed its contents. Fixes https://github.com/pantsbuild/pants/issues/21020 --------- Co-authored-by: Huon Wilson --- docs/notes/2.23.x.md | 8 ++ .../backend/javascript/subsystems/nodejs.py | 15 +-- .../javascript/subsystems/nodejs_test.py | 34 ++++- .../javascript/subsystems/nodejs_tool_test.py | 117 ++++++++++++++++-- 4 files changed, 153 insertions(+), 21 deletions(-) diff --git a/docs/notes/2.23.x.md b/docs/notes/2.23.x.md index 4bf5272702b..44ff83a6190 100644 --- a/docs/notes/2.23.x.md +++ b/docs/notes/2.23.x.md @@ -100,6 +100,14 @@ Nodejs processes configured with `extra_env_vars`, e.g. now supports extending the `PATH` variable of such processes. Passing `extra_env_vars=["PATH=/usr/bin"]` was previously silently ignored. +Two issues with pants `corepack` integration has been resolved: +1. The `"packageManager"` package.json field is now respected for other package.json than the one at the build root. +Previously, if for example a nodejs tool was configured with a resolve based off of such a package.json, the bug caused +pants to invoke `corepack`s default versions of the package managers instead. +2. The pants.toml option `[nodejs].package_manager` can now be assigned any of the supported package managers +(npm, pnpm, yarn) without providing a corresponding `[nodejs].package_managers` version setting. The version is then +entirely handled by `corepack`. Previously this mode caused pants to fail. + #### Shell The `tailor` goal now has independent options for tailoring `shell_sources` and `shunit2_tests` targets. The option was split from `tailor` into [`tailor_sources`](https://www.pantsbuild.org/2.22/reference/subsystems/shell-setup#tailor_sources) and [`tailor_shunit2_tests`](https://www.pantsbuild.org/2.22/reference/subsystems/shell-setup#tailor_shunit2_tests). diff --git a/src/python/pants/backend/javascript/subsystems/nodejs.py b/src/python/pants/backend/javascript/subsystems/nodejs.py index c1128bfd3bc..5521e17247c 100644 --- a/src/python/pants/backend/javascript/subsystems/nodejs.py +++ b/src/python/pants/backend/javascript/subsystems/nodejs.py @@ -537,8 +537,6 @@ async def determine_nodejs_binaries( @dataclass(frozen=True) class CorepackToolRequest: tool: str - input_digest: Digest - working_directory: str | None = None version: str | None = None @@ -552,20 +550,16 @@ async def prepare_corepack_tool( request: CorepackToolRequest, environment: NodeJSProcessEnvironment, nodejs: NodeJS ) -> CorepackToolDigest: version = request.version or nodejs.package_managers.get(request.tool) - if not version and request.input_digest == EMPTY_DIGEST: - raise ValueError(f"Could not determine tool version for {request.tool}.") tool_spec = f"{request.tool}@{version}" if version else request.tool tool_description = tool_spec if version else f"default {tool_spec} version" result = await Get( ProcessResult, Process( argv=filter( - None, ("corepack", "prepare", tool_spec if version else None, "--activate") + None, ("corepack", "prepare", tool_spec if version else "--all", "--activate") ), description=f"Preparing configured {tool_description}.", - input_digest=request.input_digest, immutable_input_digests=environment.immutable_digest(), - working_directory=request.working_directory, level=LogLevel.DEBUG, env=environment.to_env_dict(), append_only_caches={**environment.append_only_caches}, @@ -583,12 +577,7 @@ async def setup_node_tool_process( tool_name = request.tool.replace("npx", "npm") corepack_tool = await Get( CorepackToolDigest, - CorepackToolRequest( - tool_name, - request.project_digest or EMPTY_DIGEST, - request.working_directory, - request.tool_version, - ), + CorepackToolRequest(tool_name, request.tool_version), ) input_digest = await Get(Digest, MergeDigests([request.input_digest, corepack_tool.digest])) else: diff --git a/src/python/pants/backend/javascript/subsystems/nodejs_test.py b/src/python/pants/backend/javascript/subsystems/nodejs_test.py index ddf69eb429c..c6e81214a8f 100644 --- a/src/python/pants/backend/javascript/subsystems/nodejs_test.py +++ b/src/python/pants/backend/javascript/subsystems/nodejs_test.py @@ -14,6 +14,8 @@ from pants.backend.javascript.subsystems import nodejs from pants.backend.javascript.subsystems.nodejs import ( + CorepackToolDigest, + CorepackToolRequest, NodeJS, NodeJSBinaries, NodeJSProcessEnvironment, @@ -35,7 +37,7 @@ ) from pants.core.util_rules.system_binaries import BinaryNotFoundError, BinaryPath, BinaryShims from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest -from pants.engine.internals.native_engine import EMPTY_DIGEST +from pants.engine.internals.native_engine import EMPTY_DIGEST, Digest, Snapshot from pants.engine.platform import Platform from pants.engine.process import ProcessResult from pants.testutil.rule_runner import MockGet, QueryRule, RuleRunner, run_rule_with_mocks @@ -53,6 +55,7 @@ def rule_runner() -> RuleRunner: QueryRule(ProcessResult, [nodejs.NodeJSToolProcess]), QueryRule(NodeJSBinaries, ()), QueryRule(VersionManagerSearchPaths, (VersionManagerSearchPathsRequest,)), + QueryRule(CorepackToolDigest, (CorepackToolRequest,)), ], target_types=[JSSourcesGeneratorTarget], ) @@ -60,6 +63,35 @@ def rule_runner() -> RuleRunner: return rule_runner +def get_snapshot(rule_runner: RuleRunner, digest: Digest) -> Snapshot: + return rule_runner.request(Snapshot, [digest]) + + +@pytest.mark.parametrize("package_manager", ["npm", "pnpm", "yarn"]) +def test_corepack_without_explicit_version_contains_installation( + rule_runner: RuleRunner, package_manager: str +): + result = rule_runner.request( + CorepackToolDigest, [CorepackToolRequest(package_manager, version=None)] + ) + + snapshot = get_snapshot(rule_runner, result.digest) + + assert f"._corepack_home/{package_manager}" in snapshot.dirs + + +@pytest.mark.parametrize("package_manager", ["npm@7.0.0", "pnpm@2.0.0", "yarn@1.0.0"]) +def test_corepack_with_explicit_version_contains_requested_installation( + rule_runner: RuleRunner, package_manager: str +): + binary, version = package_manager.split("@") + + result = rule_runner.request(CorepackToolDigest, [CorepackToolRequest(binary, version)]) + snapshot = get_snapshot(rule_runner, result.digest) + + assert f"._corepack_home/{binary}/{version}" in snapshot.dirs + + def test_npm_process(rule_runner: RuleRunner): rule_runner.set_options(["--nodejs-package-managers={'npm': '8.5.5'}"], env_inherit={"PATH"}) result = rule_runner.request( diff --git a/src/python/pants/backend/javascript/subsystems/nodejs_tool_test.py b/src/python/pants/backend/javascript/subsystems/nodejs_tool_test.py index 6131c9100ff..05a69023378 100644 --- a/src/python/pants/backend/javascript/subsystems/nodejs_tool_test.py +++ b/src/python/pants/backend/javascript/subsystems/nodejs_tool_test.py @@ -3,6 +3,7 @@ from __future__ import annotations +import dataclasses import json from pathlib import Path @@ -12,7 +13,7 @@ from pants.backend.javascript.subsystems import nodejs_tool from pants.backend.javascript.subsystems.nodejs_tool import NodeJSToolBase, NodeJSToolRequest from pants.engine.internals.native_engine import EMPTY_DIGEST -from pants.engine.process import ProcessResult +from pants.engine.process import Process, ProcessResult from pants.testutil.rule_runner import QueryRule, RuleRunner from pants.util.logging import LogLevel @@ -33,6 +34,8 @@ def rule_runner() -> RuleRunner: *CowsayTool.rules(), QueryRule(CowsayTool, []), QueryRule(ProcessResult, [NodeJSToolRequest]), + QueryRule(Process, [NodeJSToolRequest]), + QueryRule(ProcessResult, [Process]), ], target_types=[PackageJsonTarget], ) @@ -45,8 +48,19 @@ def test_version_option_overrides_default(rule_runner: RuleRunner): assert tool.version == "cowsay@1.5.0" -@pytest.mark.parametrize("package_manager", ["yarn", "npm", "pnpm"]) -def test_execute_process_with_package_manager(rule_runner: RuleRunner, package_manager: str): +@pytest.mark.parametrize( + "package_manager, expected_argv", + [ + pytest.param("yarn", ("yarn", "dlx", "--quiet"), id="yarn"), + pytest.param("npm", ("npm", "exec", "--yes", "--"), id="npm"), + pytest.param("pnpm", ("pnpm", "dlx"), id="pnpm"), + ], +) +def test_execute_process_with_package_manager( + rule_runner: RuleRunner, + package_manager: str, + expected_argv: tuple[str, ...], +): rule_runner.set_options( [ "--cowsay-version=cowsay@1.5.0", @@ -56,14 +70,91 @@ def test_execute_process_with_package_manager(rule_runner: RuleRunner, package_m env_inherit={"PATH"}, ) tool = rule_runner.request(CowsayTool, []) - result = rule_runner.request( - ProcessResult, - [tool.request(("--version",), EMPTY_DIGEST, "Cowsay version", LogLevel.DEBUG)], - ) + + request = tool.request(("--version",), EMPTY_DIGEST, "Cowsay version", LogLevel.DEBUG) + + to_run = rule_runner.request(Process, [request]) + + assert to_run.argv == expected_argv + ("cowsay@1.5.0", "--version") + + result = rule_runner.request(ProcessResult, [request]) assert result.stdout == b"1.5.0\n" +@pytest.mark.parametrize( + "package_manager, version", + [ + pytest.param("yarn", "1.22.15", id="yarn"), + pytest.param("npm", "7.20.1", id="npm"), + pytest.param("pnpm", "6.32.12", id="pnpm"), + ], +) +def test_execute_process_with_package_manager_version_from_configuration( + rule_runner: RuleRunner, + package_manager: str, + version: str, +): + rule_runner.set_options( + [ + f"--nodejs-package-manager={package_manager}", + f"--nodejs-package-managers={{'{package_manager}': '{version}'}}", + ], + env_inherit={"PATH"}, + ) + + tool = rule_runner.request(CowsayTool, []) + + result = request_package_manager_version_for_tool(tool, package_manager, rule_runner) + + assert result == version + + +@pytest.mark.parametrize( + "lockfile_path, package_manager, version", + [ + pytest.param(Path(__file__).parent / "yarn.lock", "yarn", "1.22.15", id="yarn_resolve"), + pytest.param( + Path(__file__).parent / "pnpm-lock.yaml", "pnpm", "6.32.12", id="pnpm_resolve" + ), + pytest.param( + Path(__file__).parent / "package-lock.json", "npm", "7.20.1", id="npm_resolve" + ), + ], +) +def test_execute_process_with_package_manager_version_from_resolve_package_manager( + rule_runner: RuleRunner, + lockfile_path: Path, + package_manager: str, + version: str, +): + rule_runner.set_options( + [ + "--nodejs-package-managers={}", + "--cowsay-install-from-resolve=nodejs-default", + ], + env_inherit={"PATH"}, + ) + rule_runner.write_files( + { + "BUILD": "package_json(name='root_pkg')", + "package.json": json.dumps( + { + "name": "@the-company/project", + "devDependencies": {"cowsay": "1.5.0"}, + "packageManager": f"{package_manager}@{version}", + } + ), + lockfile_path.name: lockfile_path.read_text(), + } + ) + tool = rule_runner.request(CowsayTool, []) + + result = request_package_manager_version_for_tool(tool, package_manager, rule_runner) + + assert result == version + + @pytest.mark.parametrize( "lockfile_path, package_manager", [ @@ -98,3 +189,15 @@ def test_resolve_dictates_version( ) assert result.stdout == b"1.5.0\n" + + +def request_package_manager_version_for_tool( + tool: NodeJSToolBase, package_manager: str, rule_runner: RuleRunner +) -> str: + request = tool.request((), EMPTY_DIGEST, "Inspect package manager version", LogLevel.DEBUG) + process = rule_runner.request(Process, [request]) + result = rule_runner.request( + ProcessResult, + [dataclasses.replace(process, argv=(package_manager, "--version"))], + ) + return result.stdout.decode().strip() From cfb3cafcac2386c909bc4a3fee3997e1bd8b03d7 Mon Sep 17 00:00:00 2001 From: Tom Solberg Date: Sun, 23 Jun 2024 22:49:38 +0200 Subject: [PATCH 06/10] Prepare 2.23.0.dev2 (#21101) --- src/python/pants/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/pants/VERSION b/src/python/pants/VERSION index 000fadea863..d57c9a5b2b5 100644 --- a/src/python/pants/VERSION +++ b/src/python/pants/VERSION @@ -1 +1 @@ -2.23.0.dev1 +2.23.0.dev2 From 777c2822c3eb5ba1b94b6cd6589870fd25331907 Mon Sep 17 00:00:00 2001 From: Tom Dyas Date: Tue, 25 Jun 2024 00:32:27 -0400 Subject: [PATCH 07/10] shell/adhoc: use metadata hashing instead of content hashing for `workspace_invalidation_sources` (#21092) Switch to using metadata-based invalidation instead of content-based invalidation for any sources referenced by the `workspace_invalidation_sources` field. This is necessary because `workspace_invalidation_sources` is intended to be used with the `experimental_workspace_environment` and there is a problem with content-based invalidation in that scenario: There is a potential cache poisoning scenario where Pants computes a content digest but then the user overwrites the digested sources before Pants has executed the applicable `adhoc_tool` / `shell_command` process. The cache will now have a result stored under the digest of the original file version even though the file content changed. See https://github.com/pantsbuild/pants/pull/21051#issuecomment-2174995973 for expanded discussion. --- .../core/util_rules/adhoc_process_support.py | 64 +++++++++++-- .../util_rules/adhoc_process_support_test.py | 95 +++++++++++++++++++ .../pants/engine/internals/native_engine.pyi | 13 +++ src/rust/engine/src/externs/fs.rs | 46 ++++++++- 4 files changed, 209 insertions(+), 9 deletions(-) create mode 100644 src/python/pants/core/util_rules/adhoc_process_support_test.py diff --git a/src/python/pants/core/util_rules/adhoc_process_support.py b/src/python/pants/core/util_rules/adhoc_process_support.py index 5ba39a20536..46a13f6846e 100644 --- a/src/python/pants/core/util_rules/adhoc_process_support.py +++ b/src/python/pants/core/util_rules/adhoc_process_support.py @@ -3,10 +3,13 @@ from __future__ import annotations import dataclasses +import hashlib +import json import logging import os import shlex from dataclasses import dataclass +from datetime import datetime from textwrap import dedent # noqa: PNT20 from typing import Iterable, Mapping, TypeVar, Union @@ -28,9 +31,12 @@ FileContent, MergeDigests, PathGlobs, + PathMetadataRequest, + PathMetadataResult, + Paths, Snapshot, ) -from pants.engine.internals.native_engine import AddressInput, RemovePrefix +from pants.engine.internals.native_engine import AddressInput, PathMetadata, RemovePrefix from pants.engine.process import ( FallibleProcessResult, Process, @@ -542,6 +548,53 @@ async def run_adhoc_process( return AdhocProcessResult(result, adjusted) +# Compute a stable bytes value for a `PathMetadata` consisting of the values to be hashed. +# Access time is not included to avoid having mere access to a file invalidating an execution. +def _path_metadata_to_bytes(m: PathMetadata | None) -> bytes: + if m is None: + return b"" + + def dt_fmt(dt: datetime | None) -> str | None: + if dt is not None: + return dt.isoformat() + return None + + d = { + "path": m.path, + "kind": str(m.kind), + "length": m.length, + "is_executable": m.is_executable, + "unix_mode": m.unix_mode, + "created": dt_fmt(m.created), + "modified": dt_fmt(m.modified), + "symlink_target": m.symlink_target, + } + + return json.dumps(d, sort_keys=True).encode() + + +async def compute_workspace_invalidation_hash(path_globs: PathGlobs) -> str: + raw_paths = await Get(Paths, PathGlobs, path_globs) + paths = sorted([*raw_paths.files, *raw_paths.dirs]) + metadata_results = await MultiGet( + Get(PathMetadataResult, PathMetadataRequest(path)) for path in paths + ) + + # Compute a stable hash of all of the metadatas since the hash value should be stable + # when used outside the process (for example, in the cache). (The `__hash__` dunder method + # computes an unstable hash which can and does vary across different process invocations.) + # + # While it could be more of an intellectual correctness point than a necessity, It does matter, + # however, for a single user to see the same behavior across process invocations if pantsd restarts. + # + # Note: This could probbaly use a non-cryptographic hash (e.g., Murmur), but that would require + # a third party dependency. + h = hashlib.sha256() + for mr in metadata_results: + h.update(_path_metadata_to_bytes(mr.metadata)) + return h.hexdigest() + + @rule async def prepare_adhoc_process( request: AdhocProcessRequest, @@ -569,14 +622,13 @@ async def prepare_adhoc_process( if supplied_env_vars: command_env.update(supplied_env_vars) - # Compute the digest for any workspace invalidation sources and put the digest into the environment as a dummy variable + # Compute the hash for any workspace invalidation sources and put the hash into the environment as a dummy variable # so that the process produced by this rule will be invalidated if any of the referenced files change. if request.workspace_invalidation_globs is not None: - workspace_invalidation_digest = await Get( - Digest, PathGlobs, request.workspace_invalidation_globs + workspace_invalidation_hash = await compute_workspace_invalidation_hash( + request.workspace_invalidation_globs ) - digest_str = f"{workspace_invalidation_digest.fingerprint}-{workspace_invalidation_digest.serialized_bytes_length}" - command_env["__PANTS_WORKSPACE_INVALIDATION_SOURCES_DIGEST"] = digest_str + command_env["__PANTS_WORKSPACE_INVALIDATION_SOURCES_HASH"] = workspace_invalidation_hash input_snapshot = await Get(Snapshot, Digest, request.input_digest) diff --git a/src/python/pants/core/util_rules/adhoc_process_support_test.py b/src/python/pants/core/util_rules/adhoc_process_support_test.py new file mode 100644 index 00000000000..a96d706f843 --- /dev/null +++ b/src/python/pants/core/util_rules/adhoc_process_support_test.py @@ -0,0 +1,95 @@ +# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). +from __future__ import annotations + +from datetime import datetime, timedelta, timezone + +from pants.core.util_rules.adhoc_process_support import _path_metadata_to_bytes +from pants.engine.internals.native_engine import PathMetadata, PathMetadataKind + + +def test_path_metadata_to_bytes() -> None: + now = datetime.now(timezone.utc) + + m_file = PathMetadata( + path="foo", + kind=PathMetadataKind.FILE, + length=5, + is_executable=False, + unix_mode=0o666, + accessed=now, + created=now, + modified=now, + symlink_target=None, + ) + b_file = _path_metadata_to_bytes(m_file) + assert len(b_file) > 0 + + m_dir = PathMetadata( + path="sub-dir", + kind=PathMetadataKind.DIRECTORY, + length=5, + is_executable=False, + unix_mode=0o777, + accessed=now, + created=now, + modified=now, + symlink_target=None, + ) + assert m_dir is not None + b_dir = _path_metadata_to_bytes(m_dir) + assert len(b_dir) > 0 + + m_symlink = PathMetadata( + path="bar", + kind=PathMetadataKind.SYMLINK, + length=5, + is_executable=False, + unix_mode=0o555, + accessed=now, + created=now, + modified=now, + symlink_target="foo", + ) + assert m_symlink is not None + b_symlink = _path_metadata_to_bytes(m_symlink) + assert len(b_symlink) > 0 + + b_missing = _path_metadata_to_bytes(None) + assert len(b_missing) == 0 + + # Update the access time only and see if conversion remains the same. + atime = m_file.accessed + assert atime is not None + m1 = PathMetadata( + path=m_file.path, + kind=m_file.kind, + length=m_file.length, + is_executable=m_file.is_executable, + unix_mode=m_file.unix_mode, + accessed=atime + timedelta(seconds=1), + created=m_file.created, + modified=m_file.modified, + symlink_target=m_file.symlink_target, + ) + b1 = _path_metadata_to_bytes(m1) + assert len(b1) > 0 + assert b_file == b1 + + # Update the modified time and conversion should differ. + mtime = m1.modified + assert mtime is not None + m2 = PathMetadata( + path=m1.path, + kind=m1.kind, + length=m1.length, + is_executable=m1.is_executable, + unix_mode=m1.unix_mode, + accessed=m1.accessed, + created=m1.created, + modified=mtime + timedelta(seconds=1), + symlink_target=m1.symlink_target, + ) + b2 = _path_metadata_to_bytes(m2) + assert len(b2) > 0 + assert b1 != b2 diff --git a/src/python/pants/engine/internals/native_engine.pyi b/src/python/pants/engine/internals/native_engine.pyi index 8ac01f10ff5..279d4249b31 100644 --- a/src/python/pants/engine/internals/native_engine.pyi +++ b/src/python/pants/engine/internals/native_engine.pyi @@ -501,6 +501,18 @@ class PathMetadataKind: SYMLINK: PathMetadataKind = ... class PathMetadata: + def __new__( + cls, + path: str, + kind: PathMetadataKind, + length: int, + is_executable: bool, + unix_mode: int | None, + accessed: datetime | None, + created: datetime | None, + modified: datetime | None, + symlink_target: str | None, + ) -> PathMetadata: ... @property def path(self) -> str: ... @property @@ -519,6 +531,7 @@ class PathMetadata: def modified(self) -> datetime | None: ... @property def symlink_target(self) -> str | None: ... + def copy(self) -> PathMetadata: ... # ------------------------------------------------------------------------------ # Intrinsics diff --git a/src/rust/engine/src/externs/fs.rs b/src/rust/engine/src/externs/fs.rs index 5efaa1738e3..c95f96e8c93 100644 --- a/src/rust/engine/src/externs/fs.rs +++ b/src/rust/engine/src/externs/fs.rs @@ -14,8 +14,8 @@ use pyo3::prelude::*; use pyo3::types::{PyIterator, PyString, PyTuple, PyType}; use fs::{ - DirectoryDigest, FilespecMatcher, GlobExpansionConjunction, PathGlobs, StrictGlobMatching, - EMPTY_DIRECTORY_DIGEST, + DirectoryDigest, FilespecMatcher, GlobExpansionConjunction, PathGlobs, PathMetadata, + StrictGlobMatching, EMPTY_DIRECTORY_DIGEST, }; use hashing::{Digest, Fingerprint, EMPTY_DIGEST}; use store::Snapshot; @@ -495,6 +495,16 @@ impl From for PyPathMetadataKind { } } +impl From for fs::PathMetadataKind { + fn from(value: PyPathMetadataKind) -> Self { + match value { + PyPathMetadataKind::File => fs::PathMetadataKind::File, + PyPathMetadataKind::Directory => fs::PathMetadataKind::Directory, + PyPathMetadataKind::Symlink => fs::PathMetadataKind::Symlink, + } + } +} + /// Expanded version of `Stat` when access to additional filesystem attributes is necessary. #[pyclass(name = "PathMetadata")] #[derive(Clone, Debug, Eq, PartialEq)] @@ -502,6 +512,32 @@ pub struct PyPathMetadata(pub fs::PathMetadata); #[pymethods] impl PyPathMetadata { + #[new] + pub fn new( + path: PathBuf, + kind: PyPathMetadataKind, + length: u64, + is_executable: bool, + unix_mode: Option, + accessed: Option, + created: Option, + modified: Option, + symlink_target: Option, + ) -> Self { + let this = PathMetadata { + path, + kind: kind.into(), + length, + is_executable, + unix_mode, + accessed, + created, + modified, + symlink_target, + }; + PyPathMetadata(this) + } + #[getter] pub fn path(&self) -> PyResult { Ok(self.0.path.clone()) @@ -509,7 +545,7 @@ impl PyPathMetadata { #[getter] pub fn kind(&self) -> PyResult { - Ok(PyPathMetadataKind::from(self.0.kind)) + Ok(self.0.kind.into()) } #[getter] @@ -547,6 +583,10 @@ impl PyPathMetadata { Ok(self.0.symlink_target.clone()) } + pub fn copy(&self) -> PyResult { + Ok(self.clone()) + } + fn __repr__(&self) -> String { format!("{:?}", self.0) } From 38e603ea05d2ef5db372fc85f7d1d929dd68a4c0 Mon Sep 17 00:00:00 2001 From: Tom Dyas Date: Tue, 25 Jun 2024 01:30:55 -0400 Subject: [PATCH 08/10] upgrade Rust to v1.79.0 (#21105) Upgrade Rust to v1.79.0. [Release Notes](https://blog.rust-lang.org/2024/06/13/Rust-1.79.0.html) Fixed one clippy lint issue and deprecation of `default_features` in `Cargo.toml` syntax (it is now called `default-features`). --- .github/workflows/release.yaml | 6 +++--- .github/workflows/test.yaml | 10 +++++----- src/rust/engine/Cargo.toml | 2 +- src/rust/engine/rule_graph/src/builder.rs | 2 +- src/rust/engine/rust-toolchain | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 09b9fd5ddc2..5caba39051b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -192,7 +192,7 @@ jobs: with: key: macOS10-15-x86_64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes @@ -276,7 +276,7 @@ jobs: with: key: macOS11-ARM64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes @@ -367,7 +367,7 @@ jobs: uses: actions/cache@v3 with: key: Linux-x86_64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1adf13911d4..6b16b5184bf 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -38,7 +38,7 @@ jobs: uses: actions/cache@v3 with: key: Linux-ARM64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes @@ -130,7 +130,7 @@ jobs: uses: actions/cache@v3 with: key: Linux-x86_64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes @@ -232,7 +232,7 @@ jobs: uses: actions/cache@v3 with: key: macOS12-x86_64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes @@ -441,7 +441,7 @@ jobs: uses: actions/cache@v3 with: key: macOS10-15-x86_64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes @@ -508,7 +508,7 @@ jobs: uses: actions/cache@v3 with: key: macOS11-ARM64-rustup-${{ hashFiles('src/rust/engine/rust-toolchain') }}-v2 - path: '~/.rustup/toolchains/1.78.0-* + path: '~/.rustup/toolchains/1.79.0-* ~/.rustup/update-hashes diff --git a/src/rust/engine/Cargo.toml b/src/rust/engine/Cargo.toml index 9f11d11b98d..46a836214ca 100644 --- a/src/rust/engine/Cargo.toml +++ b/src/rust/engine/Cargo.toml @@ -72,7 +72,7 @@ process_execution = { path = "process_execution" } pyo3 = { workspace = true } rand = { workspace = true } regex = { workspace = true } -reqwest = { version = "0.11", default_features = false, features = ["stream", "rustls-tls"] } +reqwest = { version = "0.11", default-features = false, features = ["stream", "rustls-tls"] } rule_graph = { path = "rule_graph" } smallvec = { version = "1", features = ["union"] } stdio = { path = "stdio" } diff --git a/src/rust/engine/rule_graph/src/builder.rs b/src/rust/engine/rule_graph/src/builder.rs index fcd2c3fe511..af29146bdf4 100644 --- a/src/rust/engine/rule_graph/src/builder.rs +++ b/src/rust/engine/rule_graph/src/builder.rs @@ -1138,7 +1138,7 @@ impl Builder { // affect our identity) rather than dependents. #[allow(clippy::comparison_chain)] let chosen_edges = { - let mut minimum_param_set_size = ::std::usize::MAX; + let mut minimum_param_set_size = usize::MAX; let mut chosen_edges = Vec::new(); for edge_ref in relevant_edge_refs { let param_set_size = graph[edge_ref.target()].0.in_set.len(); diff --git a/src/rust/engine/rust-toolchain b/src/rust/engine/rust-toolchain index d108c20bcd3..171d823894f 100644 --- a/src/rust/engine/rust-toolchain +++ b/src/rust/engine/rust-toolchain @@ -1,5 +1,5 @@ [toolchain] -channel = "1.78.0" +channel = "1.79.0" # NB: We don't list the components (namely `clippy` and `rustfmt`) and instead rely on either the # the profile being "default" (by-and-large the default profile) or the nice error message from # `cargo fmt` and `cargo clippy` if the required component isn't installed From dae72bfe2e8a1e49906d8a11c1efa0f036cafecd Mon Sep 17 00:00:00 2001 From: Alexey Tereshenkov <50622389+AlexTereshenkov@users.noreply.github.com> Date: Tue, 25 Jun 2024 07:37:36 +0100 Subject: [PATCH 09/10] docs: how to publish a Pants config schema file in Schema Store (#21095) As I publish Pants config schemas to the SchemaStore every release, it is probably helpful to document the steps briefly. --- .../maintenance-tasks-and-scripts.mdx | 8 ------- .../releases/release-process.mdx | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/docs/docs/contributions/development/maintenance-tasks-and-scripts.mdx b/docs/docs/contributions/development/maintenance-tasks-and-scripts.mdx index 7e5fbbb9669..bfc49d5f6e0 100644 --- a/docs/docs/contributions/development/maintenance-tasks-and-scripts.mdx +++ b/docs/docs/contributions/development/maintenance-tasks-and-scripts.mdx @@ -62,14 +62,6 @@ Some tools use `NodeJSToolBase` to install executable npm packages. To update th Example: [#21007](https://github.com/pantsbuild/pants/pull/21007). -## Generate a new JSON schema file - -Some editors can use JSON Schema for better completions (etc.) when editing TOML files like `pants.toml`. To generate such a schema: - -1. Run `pants help-all > all-help.json` -2. Run `pants run build-support/bin/generate_json_schema.py -- --all-help-file=all-help.json` -3. For a new release, upload the resulting file to https://www.schemastore.org/json/ - ## Update or create FaaS complete platforms files The function-as-a-service (FaaS) subsystems provide some built-in PEX complete platforms JSON files, for specific runtimes. To update or create these: diff --git a/docs/docs/contributions/releases/release-process.mdx b/docs/docs/contributions/releases/release-process.mdx index 5770b16e952..366f789ed1b 100644 --- a/docs/docs/contributions/releases/release-process.mdx +++ b/docs/docs/contributions/releases/release-process.mdx @@ -158,6 +158,30 @@ Once the workflow finishes, look through any failures and determine if there's a Alternatively, after starting the workflow, post the link to the in-progress run in `#development` in Slack, so that someone can come back to it when it does finish. +## Step 5: Publish a schema in JSON Schema Store + +Some editors can use JSON Schema for better completions when editing TOML files like `pants.toml`. +Pants configuration file schema is published at https://www.schemastore.org/. For every stable `2.x.0` release, +a new schema needs to be generated and uploaded by submitting a PR against https://github.com/SchemaStore/schemastore. +This is an example pull request for reference: https://github.com/SchemaStore/schemastore/pull/3880. + +To produce `pantsbuild-.json` schema file, run: + +```bash +pants help-all > all-help.json +pants run build-support/bin/generate_json_schema.py -- --all-help-file=all-help.json +``` + +It may be helpful to compare the last schema file with the newly produced one to make sure there are no discrepancies +(e.g. the config values have a sensible type and the help strings are rendered adequately). You can download the +schemas of previous releases from the store website; the JSON files are available at +`https://json.schemastore.org/pantsbuild-.json`. + +Watch out for any configuration parameters that may rely on your local environment as certain default config values +will be expanded using local runtime environment which is undesirable. The script handles those known config values +by keeping a list of them, however, it may need to be extended as more options with environment specific default +values are added. + ## When Things Go Wrong From time to time, a release will fail. It's a complex process. The first thing to do after you've From ecf0a7f30800db1bcaf17079f3595a2ae8587964 Mon Sep 17 00:00:00 2001 From: SJ Date: Tue, 25 Jun 2024 08:01:17 -0400 Subject: [PATCH 10/10] [Call-by-name] Replaced `ast` with `libcst` in migration code (#21089) - Gets with comments within parameter/args are now permitted - Having multiple Gets on the same line in a MultiGet no longer wipes out the latter Gets - Started on smarter `implicitly` usage - for example, functions that take no arguments no longer have an `implicitly` attached --- 3rdparty/python/requirements.txt | 1 + 3rdparty/python/user_reqs.lock | 106 ++- src/python/pants/goal/migrate_call_by_name.py | 673 +++++++----------- .../migrate_call_by_name_integration_test.py | 100 ++- .../pants/goal/migrate_call_by_name_test.py | 111 +-- src/python/pants/util/astutil.py | 25 - src/python/pants/util/astutil_test.py | 33 - src/python/pants/util/cstutil.py | 54 ++ src/python/pants/util/cstutil_test.py | 44 ++ 9 files changed, 558 insertions(+), 589 deletions(-) delete mode 100644 src/python/pants/util/astutil.py delete mode 100644 src/python/pants/util/astutil_test.py create mode 100644 src/python/pants/util/cstutil.py create mode 100644 src/python/pants/util/cstutil_test.py diff --git a/3rdparty/python/requirements.txt b/3rdparty/python/requirements.txt index 398db669dcf..1d9ab97d574 100644 --- a/3rdparty/python/requirements.txt +++ b/3rdparty/python/requirements.txt @@ -9,6 +9,7 @@ chevron==0.14.0 fasteners==0.16.3 freezegun==1.2.1 ijson==3.2.3 +libcst==1.4.0 packaging==21.3 pex==2.3.3 psutil==5.9.8 diff --git a/3rdparty/python/user_reqs.lock b/3rdparty/python/user_reqs.lock index df07bed39d1..ad60bc3f576 100644 --- a/3rdparty/python/user_reqs.lock +++ b/3rdparty/python/user_reqs.lock @@ -19,6 +19,7 @@ // "fasteners==0.16.3", // "freezegun==1.2.1", // "ijson==3.2.3", +// "libcst==1.4.0", // "mypy-typing-asserts==0.1.1", // "node-semver==0.9.0", // "packaging==21.3", @@ -939,6 +940,66 @@ "requires_python": ">=3.7", "version": "2.0.0" }, + { + "artifacts": [ + { + "algorithm": "sha256", + "hash": "d024f44059a853b4b852cfc04fec33e346659d851371e46fc8e7c19de24d3da9", + "url": "https://files.pythonhosted.org/packages/71/da/16307f14b47f761235050076e1d2954fc7de9346f1410ba8c67a54a9f40e/libcst-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" + }, + { + "algorithm": "sha256", + "hash": "b8ecdba8934632b4dadacb666cd3816627a6ead831b806336972ccc4ba7ca0e9", + "url": "https://files.pythonhosted.org/packages/7b/b1/8476fe4fa1061062855459d519ffe2115a891638c230ee3465c69fdbfd7a/libcst-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "8e54c777b8d27339b70f304d16fc8bc8674ef1bd34ed05ea874bf4921eb5a313", + "url": "https://files.pythonhosted.org/packages/7e/0d/89516795ff2a11be10c060c539895b3781793d46cb7c9b0b7b3c4fa3fbc1/libcst-1.4.0-cp39-cp39-macosx_11_0_arm64.whl" + }, + { + "algorithm": "sha256", + "hash": "bb0abf627ee14903d05d0ad9b2c6865f1b21eb4081e2c7bea1033f85db2b8bae", + "url": "https://files.pythonhosted.org/packages/95/cf/a2be91d53e4068d4def8b5cc475f20e1c1a7d32c85634ee7d6b3ea2e3c9b/libcst-1.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" + }, + { + "algorithm": "sha256", + "hash": "061d6855ef30efe38b8a292b7e5d57c8e820e71fc9ec9846678b60a934b53bbb", + "url": "https://files.pythonhosted.org/packages/c0/c8/15ca337e5f5604aabed899609ba08abbc0e7815ffdfca37802da52d4d0bf/libcst-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl" + }, + { + "algorithm": "sha256", + "hash": "449e0b16604f054fa7f27c3ffe86ea7ef6c409836fe68fe4e752a1894175db00", + "url": "https://files.pythonhosted.org/packages/e4/bd/ff41d7a8efc4f60a61d903c3f9823565006f44f2b8b11c99701f552b0851/libcst-1.4.0.tar.gz" + } + ], + "project_name": "libcst", + "requires_dists": [ + "Sphinx>=5.1.1; extra == \"dev\"", + "black==23.12.1; extra == \"dev\"", + "build>=0.10.0; extra == \"dev\"", + "coverage>=4.5.4; extra == \"dev\"", + "fixit==2.1.0; extra == \"dev\"", + "flake8==7.0.0; extra == \"dev\"", + "hypothesis>=4.36.0; extra == \"dev\"", + "hypothesmith>=0.0.4; extra == \"dev\"", + "jinja2==3.1.4; extra == \"dev\"", + "jupyter>=1.0.0; extra == \"dev\"", + "maturin<1.6,>=0.8.3; extra == \"dev\"", + "nbsphinx>=0.4.2; extra == \"dev\"", + "prompt-toolkit>=2.0.9; extra == \"dev\"", + "pyre-check==0.9.18; platform_system != \"Windows\" and extra == \"dev\"", + "pyyaml>=5.2", + "setuptools-rust>=1.5.2; extra == \"dev\"", + "setuptools-scm>=6.0.1; extra == \"dev\"", + "slotscheck>=0.7.1; extra == \"dev\"", + "sphinx-rtd-theme>=0.4.3; extra == \"dev\"", + "ufmt==2.6.0; extra == \"dev\"", + "usort==1.0.8.post1; extra == \"dev\"" + ], + "requires_python": ">=3.9", + "version": "1.4.0" + }, { "artifacts": [ { @@ -1119,43 +1180,43 @@ "artifacts": [ { "algorithm": "sha256", - "hash": "28e552a060ba2740d0d2aabe35162652c1459a0b9069fe0db7f4ee0e18e74d58", - "url": "https://files.pythonhosted.org/packages/27/17/a9872f20809e37ad03c523994ef3e0b7420c6508fe4553b7c0d8476ee03a/pydantic-1.10.15-py3-none-any.whl" + "hash": "aa2774ba5412fd1c5cb890d08e8b0a3bb5765898913ba1f61a65a4810f03cf29", + "url": "https://files.pythonhosted.org/packages/b6/85/f23404c7c82ddaa60e83ee0b1ce9c1692a04ed2861c7306ba5d06410608d/pydantic-1.10.16-py3-none-any.whl" }, { "algorithm": "sha256", - "hash": "92229f73400b80c13afcd050687f4d7e88de9234d74b27e6728aa689abcf58cc", - "url": "https://files.pythonhosted.org/packages/4f/fa/da8c9468b6c7eb974721af57abbbfd33537b8c29a9a2c0c788a3dc6aea1b/pydantic-1.10.15-cp39-cp39-musllinux_1_1_i686.whl" + "hash": "8bb388f6244809af69ee384900b10b677a69f1980fdc655ea419710cffcb5610", + "url": "https://files.pythonhosted.org/packages/11/af/3fbad4c7f9a1c4410b500cd10ba4a9c4fee38ff341428f17d3e6b01636f9/pydantic-1.10.16.tar.gz" }, { "algorithm": "sha256", - "hash": "67f1a1fb467d3f49e1708a3f632b11c69fccb4e748a325d5a491ddc7b5d22383", - "url": "https://files.pythonhosted.org/packages/53/c8/b417d5016955033a7d8b57fb0ec40666cae9a36ce248b875b8d8f8164feb/pydantic-1.10.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" + "hash": "d8d3c71d14c8bd26d2350c081908dbf59d5a6a8f9596d9ef2b09cc1e61c8662b", + "url": "https://files.pythonhosted.org/packages/1f/18/c83803430a0f4caa9acae30c862899f53750798dbf4de0b95023d1aea037/pydantic-1.10.16-cp39-cp39-macosx_11_0_arm64.whl" }, { "algorithm": "sha256", - "hash": "676ed48f2c5bbad835f1a8ed8a6d44c1cd5a21121116d2ac40bd1cd3619746ed", - "url": "https://files.pythonhosted.org/packages/54/b3/3e806647d3ee420ab4e4e0f9c9e215b87ffd3a654ea893ab32da0726c9e5/pydantic-1.10.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" + "hash": "3895ddb26f22bdddee7e49741486aa7b389258c6f6771943e87fc00eabd79134", + "url": "https://files.pythonhosted.org/packages/2a/6a/13542c4ffa7b30dc8fc3a147970d9d31961060fe062228f967d8584df5ce/pydantic-1.10.16-cp39-cp39-musllinux_1_1_i686.whl" }, { "algorithm": "sha256", - "hash": "a980a77c52723b0dc56640ced396b73a024d4b74f02bcb2d21dbbac1debbe9d0", - "url": "https://files.pythonhosted.org/packages/58/3c/0606b7c07a531f27da504d547623a31e05146ea28c011fa316d331dfad8e/pydantic-1.10.15-cp39-cp39-macosx_11_0_arm64.whl" + "hash": "9d91f6866fd3e303c632207813ef6bc4d86055e21c5e5a0a311983a9ac5f0192", + "url": "https://files.pythonhosted.org/packages/58/68/1a0798d394116983a732868592ab6810a369a0165c101200b21b5a451f9b/pydantic-1.10.16-cp39-cp39-macosx_10_9_x86_64.whl" }, { "algorithm": "sha256", - "hash": "ca832e124eda231a60a041da4f013e3ff24949d94a01154b137fc2f2a43c3ffb", - "url": "https://files.pythonhosted.org/packages/6f/3a/1a057845b787469aa8c4ff2c126d5110d616223c82e74561b8ae56b46ec7/pydantic-1.10.15.tar.gz" + "hash": "b73e6386b439b4881d79244e9fc1e32d1e31e8d784673f5d58a000550c94a6c0", + "url": "https://files.pythonhosted.org/packages/a1/96/3cd54412c7f423571735224bd7e81e1d40e027bef022055daf0a4f5c8e71/pydantic-1.10.16-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" }, { "algorithm": "sha256", - "hash": "51d405b42f1b86703555797270e4970a9f9bd7953f3990142e69d1037f9d9e51", - "url": "https://files.pythonhosted.org/packages/ac/93/0dc45c885fb3f1dcccc5e2c498f95007defbdad122998286769384afc7ba/pydantic-1.10.15-cp39-cp39-macosx_10_9_x86_64.whl" + "hash": "5f039881fb2ef86f6de6eacce6e71701b47500355738367413ccc1550b2a69cf", + "url": "https://files.pythonhosted.org/packages/d1/f6/a9bac5c07e14e82f7ee525ad45bb9d0038efab91729f408c940ed023d54d/pydantic-1.10.16-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl" }, { "algorithm": "sha256", - "hash": "2746189100c646682eff0bce95efa7d2e203420d8e1c613dc0c6b4c1d9c1fde4", - "url": "https://files.pythonhosted.org/packages/b0/23/87acb9a7e70a18730aa38f0f3d12e855581411b4c843531c61f384c904c5/pydantic-1.10.15-cp39-cp39-musllinux_1_1_x86_64.whl" + "hash": "55b945da2756b5cef93d792521ad0d457fdf2f69fd5a2d10a27513f5281717dd", + "url": "https://files.pythonhosted.org/packages/fc/e3/cb85dfd9619f382558f7284c7a93bd669f84782b2c0b810d30c683163223/pydantic-1.10.16-cp39-cp39-musllinux_1_1_x86_64.whl" } ], "project_name": "pydantic", @@ -1165,7 +1226,7 @@ "typing-extensions>=4.2.0" ], "requires_python": ">=3.7", - "version": "1.10.15" + "version": "1.10.16" }, { "artifacts": [ @@ -2042,13 +2103,13 @@ "artifacts": [ { "algorithm": "sha256", - "hash": "450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", - "url": "https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.whl" + "hash": "a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "url": "https://files.pythonhosted.org/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl" }, { "algorithm": "sha256", - "hash": "d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19", - "url": "https://files.pythonhosted.org/packages/7a/50/7fd50a27caa0652cd4caf224aa87741ea41d3265ad13f010886167cfcc79/urllib3-2.2.1.tar.gz" + "hash": "dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168", + "url": "https://files.pythonhosted.org/packages/43/6d/fa469ae21497ddc8bc93e5877702dca7cb8f911e337aca7452b5724f1bb6/urllib3-2.2.2.tar.gz" } ], "project_name": "urllib3", @@ -2060,7 +2121,7 @@ "zstandard>=0.18.0; extra == \"zstd\"" ], "requires_python": ">=3.8", - "version": "2.2.1" + "version": "2.2.2" }, { "artifacts": [ @@ -2329,6 +2390,7 @@ "fasteners==0.16.3", "freezegun==1.2.1", "ijson==3.2.3", + "libcst==1.4.0", "mypy-typing-asserts==0.1.1", "node-semver==0.9.0", "packaging==21.3", diff --git a/src/python/pants/goal/migrate_call_by_name.py b/src/python/pants/goal/migrate_call_by_name.py index 220b83d1bc7..ed74acba363 100644 --- a/src/python/pants/goal/migrate_call_by_name.py +++ b/src/python/pants/goal/migrate_call_by_name.py @@ -3,16 +3,18 @@ from __future__ import annotations -import ast -import fileinput import importlib.util import json import logging -import tokenize from dataclasses import dataclass from functools import partial from pathlib import Path, PurePath -from typing import Callable, Literal, TypedDict, cast +from typing import Callable, Iterable, TypedDict + +import libcst as cst +import libcst.matchers as m +import libcst.metadata +from libcst.display import dump from pants.base.build_environment import get_buildroot from pants.base.exiter import PANTS_SUCCEEDED_EXIT_CODE, ExitCode @@ -24,7 +26,7 @@ from pants.init.engine_initializer import GraphSession from pants.option.option_types import BoolOption from pants.option.options import Options -from pants.util.astutil import maybe_narrow_type, narrow_type +from pants.util import cstutil from pants.util.strutil import softwrap logger = logging.getLogger(__name__) @@ -98,9 +100,16 @@ def run( syntax_mapper = CallByNameSyntaxMapper(migration_plan) for f in sorted(files_to_migrate): file = Path(f) - if replacements := self._create_replacements_for_file(file, syntax_mapper): - self._perform_replacements_on_file(file, replacements) - self._perform_renaming_on_file(file, {"MultiGet": "concurrently"}) + logger.info(f"Processing {file}") + + transformer = CallByNameTransformer(file, syntax_mapper) + source_code = Path.read_text(file) + source_tree = cst.parse_module(source_code) + new_tree = source_tree.visit(transformer) + + if not new_tree.deep_equals(source_tree): + new_source = new_tree.code + Path.write_text(file, new_source) return PANTS_SUCCEEDED_EXIT_CODE @@ -167,115 +176,6 @@ def _create_migration_plan( return sorted(items, key=lambda x: (x["filepath"], x["function"])) - def _create_replacements_for_file( - self, file: Path, syntax_mapper: CallByNameSyntaxMapper - ) -> list[Replacement]: - """Create a list of replacements using the CallByNameVisitor on a file. - - After the replacements are created, they are sanitized to avoid shadowing names, - and any replacements with comments are filtered out - as those are not safe to replace - """ - visitor = CallByNameVisitor(file, syntax_mapper) - with open(file) as f: - logging.info(f"Processing {file}") - try: - source_code = f.read() - tree = ast.parse(source_code, filename=file, type_comments=True) - visitor.visit(tree) - - for replacement in visitor.replacements: - replacement.sanitize(visitor.names) - - included: list[Replacement] = [] - for r in visitor.replacements: - if r.contains_comments(source_code): - logger.warning( - softwrap( - f""" - Comments found in {file} within replacement range: {r.line_range}. This migration - replacement will not be applied, as it is not safe to replace embedded comments. - - Please review and apply one of the following suggestions: - - Move the comment(s) above or below the replacement range - - Remove the comments from the replacement range, if they are no longer relevant - - If the comments are part of non-`Get` initializer/function args, extract the args to a separate - variable and pass it in - - Manually apply the replacement. Run the migration with the `--json` flag to get the migration plan, - and manually import the new function and replace the `Get` call with the new function call - """ - ) - ) - continue - - included.append(r) - - return included - - except SyntaxError as e: - logging.error(f"SyntaxError in {file}: {e}") - except tokenize.TokenError as e: - logging.error(f"TokenError in {file}: {e}") - return [] - - def _perform_replacements_on_file(self, file: Path, replacements: list[Replacement]): - """ - Overwrite replacements for the new source code in a file using `fileinput` - so any "print" calls - will be redirected to the file. This is a destructive operation. - - This function is not idempotent, so it should be run only once per file (per migration plan). - - Replacement imports are bulk added below the existing "pants.engine.rules" import. - """ - - imports_added = False - import_strings: set[str] = set() - for replacement in replacements: - import_strings.update(ast.unparse(i) for i in replacement.sanitized_imports()) - - with fileinput.input(file, inplace=True) as f: - for line in f: - line_number = f.lineno() - - modified = False - for replacement in replacements: - if line_number == replacement.line_range[0]: - line_end = ",\n" if replacement.add_trailing_comma else "\n" - # On the first line of the range, emit the new source code where the old "Get" started - print(line[: replacement.col_range[0]], end="") - print(ast.unparse(replacement.new_source), end=line_end) - modified = True - - elif line_number in range( - replacement.line_range[0], replacement.line_range[1] + 1 - ): - # If there are other lines in the range, just skip them - modified = True - continue - - if not modified: - # For any lines that were not involved with replacements, emit them verbatim to the file - print(line, end="") - - # Add imports below "pants.engine.rules" - # Note: Intentionally not trying to add to the existing "pants.engine.rules" import, as merging and unparsing would wipe out comments (if any) - # Instead, add a new import and let the formatters sort it out - if not imports_added and line.startswith("from pants.engine.rules"): - print("\n".join(sorted(import_strings))) - imports_added = True - - def _perform_renaming_on_file(self, file: Path, rename_map: dict[str, str]): - """In-place renaming for the new source code in a file. - - This should be a separate Visitor/Transformer/Tokenizer which allows intelligent renaming based on AST context (e.g. only in Imports) - TODO: This system is already done in the deprecation fixer using tokenize. - """ - - with fileinput.input(file, inplace=True) as f: - for line in f: - for old, new in rename_map.items(): - line = line.replace(old, new) - print(line, end="") - # ------------------------------------------------------------------------------------------ # Migration Plan Typed Dicts @@ -314,57 +214,149 @@ class RuleFunction(TypedDict): class Replacement: filename: PurePath module: str - line_range: tuple[int, int] - col_range: tuple[int, int] - current_source: ast.Call - new_source: ast.Call - additional_imports: list[ast.ImportFrom] - # TODO: Use libcst or another CST, rather than an ast - add_trailing_comma: bool = False - - def sanitized_imports(self) -> list[ast.ImportFrom]: + current_source: cst.Call + new_source: cst.Call + additional_imports: list[cst.ImportFrom] + + def sanitized_imports(self) -> list[cst.ImportFrom]: """Remove any circular or self-imports.""" - return [i for i in self.additional_imports if i.module != self.module] + cst_module = cstutil.make_importfrom_attr(self.module) + return [ + i for i in self.additional_imports if i.module and not cst_module.deep_equals(i.module) + ] - def sanitize(self, names: set[str]): + def sanitize(self, unavailable_names: set[str]): """Remove any shadowing of names, except if the new_func is in the current file.""" - assert isinstance(self.new_source.func, ast.Name) - func_name = self.new_source.func.id - if func_name not in names: + + func_name = cst.ensure_type(self.new_source.func, cst.Name) + if func_name.value not in unavailable_names: return - # If the new function is not in the sanitized imports, it must be in the current file - if not any(i.names[0].name == func_name for i in self.sanitized_imports()): + # If the new func_name is not in the sanitized imports, it must already be in the current file + imported_names: set[str] = set() + for imp in self.sanitized_imports(): + assert isinstance(imp.names, Iterable) + for import_alias in imp.names: + alias_name = cst.ensure_type(import_alias.name, cst.Name) + imported_names.add(alias_name.value) + + if func_name.value not in imported_names: return - bound_name = f"{func_name}_get" - self.new_source.func.id = bound_name - for i in self.additional_imports: - if i.names[0].name == func_name: - i.names[0].asname = bound_name - logging.warning(f"Renamed {func_name} to {bound_name} to avoid shadowing") + # In-place update this replacement and additional imports + bound_name = f"{func_name.value}_get" + self.new_source = self.new_source.with_deep_changes(self.new_source.func, value=bound_name) + + for i, imp in enumerate(self.additional_imports): + assert isinstance(imp.names, Iterable) + for import_alias in imp.names: + alias_name = cst.ensure_type(import_alias.name, cst.Name) + if alias_name.value == func_name.value: + self.additional_imports[i] = imp.with_changes( + names=[cst.ImportAlias(func_name, asname=cst.AsName(cst.Name(bound_name)))] + ) - def contains_comments(self, source_code: str) -> bool: - """Check if there are any comments within the replacement range of the passed-in source - code.""" - lines = source_code.split("\n") - return any("#" in line for line in lines[self.line_range[0] - 1 : self.line_range[1]]) + logging.warning(f"Renamed {func_name} to {bound_name} to avoid shadowing") def __str__(self) -> str: return f""" Replacement( filename={self.filename}, module={self.module}, - line_range={self.line_range}, - col_range={self.col_range}, - current_source={ast.dump(self.current_source, indent=2)}, - new_source={ast.dump(self.new_source, indent=2)}, - additional_imports={[ast.dump(i, indent=2) for i in self.additional_imports]}, - add_trailing_comma={self.add_trailing_comma} + current_source={dump(self.current_source)}, + new_source={dump(self.new_source)}, + additional_imports={[dump(i) for i in self.additional_imports]}, ) """ +# ------------------------------------------------------------------------------------------ +# Call-by-name transformer +# ------------------------------------------------------------------------------------------ + + +class CallByNameTransformer(m.MatcherDecoratableTransformer): + def __init__(self, filename: PurePath, syntax_mapper: CallByNameSyntaxMapper) -> None: + super().__init__() + + self.filename = filename + self.syntax_mapper = syntax_mapper + self.calling_function: str = "" + self.additional_imports: list[cst.ImportFrom] = [] + self.unavailable_names: set[str] = set() + + def visit_FunctionDef(self, node: cst.FunctionDef) -> None: + self.calling_function = node.name.value + + def leave_FunctionDef( + self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef + ) -> cst.FunctionDef: + self.calling_function = "" + return updated_node + + @m.leave(m.Call(func=m.Name("Get"))) + def handle_get(self, original_node: cst.Call, updated_node: cst.Call) -> cst.Call: + replacement = self.syntax_mapper.map_get_to_new_syntax( + original_node, self.filename, self.calling_function + ) + if not replacement: + return updated_node + + replacement.sanitize(self.unavailable_names) + self.additional_imports.extend(replacement.sanitized_imports()) + return replacement.new_source + + @m.leave(m.Name("MultiGet")) + def handle_multiget(self, original_node: cst.Name, updated_node: cst.Name) -> cst.Name: + return updated_node.with_changes(value="concurrently") + + def visit_Module(self, node: cst.Module): + """Collects all names we risk shadowing.""" + wrapper = libcst.metadata.MetadataWrapper(module=node) + scopes = set(wrapper.resolve(libcst.metadata.ScopeProvider).values()) + self.unavailable_names = { + assignment.name for scope in scopes if scope for assignment in scope.assignments + } + + def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> cst.Module: + """Performs final updates on imports, and sanitization.""" + if not self.additional_imports: + return updated_node + + rules_import_index = 1 + for i, statement in enumerate(updated_node.body): + if m.matches( + statement, + matcher=m.SimpleStatementLine( + body=[ + m.ImportFrom( + module=cstutil.make_importfrom_attr_matcher("pants.engine.rules") + ) + ] + ), + ): + rules_import_index = i + 1 + break + + self.additional_imports.append(cstutil.make_importfrom("pants.engine.rules", "implicitly")) + additional_import_statements = [ + cst.SimpleStatementLine(body=[i]) for i in self.additional_imports + ] + additional_import_statements = [ + v1 + for i, v1 in enumerate(additional_import_statements) + if not any(v1.deep_equals(v2) for v2 in additional_import_statements[:i]) + ] + + return updated_node.with_changes( + body=[ + *updated_node.body[:rules_import_index], + *additional_import_statements, + *updated_node.body[rules_import_index:], + ] + ) + + # ------------------------------------------------------------------------------------------ # Call-by-name syntax mapping # ------------------------------------------------------------------------------------------ @@ -375,24 +367,17 @@ def __init__(self, graphs: list[RuleGraphGet]) -> None: self.graphs = graphs self.mapping: dict[ - tuple[int, type[ast.Call] | type[ast.Dict] | None], - Callable[[ast.Call, list[RuleGraphGetDep]], tuple[ast.Call, list[ast.ImportFrom]]], + tuple[int, type[cst.Call] | type[cst.Dict] | None], + Callable[[cst.Call, list[RuleGraphGetDep]], tuple[cst.Call, list[cst.ImportFrom]]], ] = { (1, None): self.map_no_args_get_to_new_syntax, - (2, ast.Call): self.map_short_form_get_to_new_syntax, - (2, ast.Dict): self.map_dict_form_get_to_new_syntax, + (2, cst.Call): self.map_short_form_get_to_new_syntax, + (2, cst.Dict): self.map_dict_form_get_to_new_syntax, (3, None): self.map_long_form_get_to_new_syntax, } - def map_get_to_new_syntax( - self, get: ast.Call, filename: PurePath, calling_func: str - ) -> Replacement | None: - """There are 4 forms the old Get() syntax can take, so we can account for each one of them - individually.""" - new_source: ast.Call | None = None - imports: list[ast.ImportFrom] = [] - - graph_item = next( + def _get_graph_item(self, filename: PurePath, calling_func: str) -> RuleGraphGet | None: + return next( ( item for item in self.graphs @@ -400,317 +385,191 @@ def map_get_to_new_syntax( ), None, ) - if not graph_item: - logger.warning(f"Failed to find dependencies for {filename} {calling_func}") + + def map_get_to_new_syntax( + self, get: cst.Call, filename: PurePath, calling_func: str + ) -> Replacement | None: + new_source: cst.Call | None = None + imports: list[cst.ImportFrom] = [] + + if not (graph_item := self._get_graph_item(filename, calling_func)): + logger.warning(f"Failed to find dependencies for {calling_func} in {filename}") return None get_deps = graph_item["gets"] - num_args = len(get.args) - arg_type: type[ast.Call] | type[ast.Dict] | None = None - if num_args == 2 and (arg := maybe_narrow_type(get.args[1], (ast.Call, ast.Dict))): - arg_type = type(arg) if arg else None # type: ignore + + arg_type = None + if num_args == 2 and (arg_type := type(get.args[1].value)) not in [cst.Call, cst.Dict]: + logger.warning(f"Failed to migrate: Unknown arg type {get.args[1]}") + return None try: - new_source, imports = self.mapping[(num_args, arg_type)](get, get_deps) + new_source, imports = self.mapping[(num_args, arg_type)](get, get_deps) # type: ignore except Exception as e: - logging.warning(f"Failed to migrate {ast.unparse(get)} with {e}\n") + logger.warning( + f"Failed to migrate Get ({num_args}, {arg_type}) in {filename}:{calling_func} due to: {e}\n" + ) return None - assert get.end_lineno is not None - assert get.end_col_offset is not None return Replacement( filename=filename, module=graph_item["module"], - line_range=(get.lineno, get.end_lineno), - col_range=(get.col_offset, get.end_col_offset), current_source=get, new_source=new_source, - additional_imports=[ - ast.ImportFrom( - module="pants.engine.rules", names=[ast.alias("implicitly")], level=0 - ), - *imports, - ], + additional_imports=imports, ) def map_no_args_get_to_new_syntax( - self, get: ast.Call, deps: list[RuleGraphGetDep] - ) -> tuple[ast.Call, list[ast.ImportFrom]]: - """Map the no-args form of Get() to the new syntax. + self, get: cst.Call, deps: list[RuleGraphGetDep] + ) -> tuple[cst.Call, list[cst.ImportFrom]]: + """Get() => the_rule_to_call(**implicitly())""" - The expected mapping is roughly: - Get() -> the_rule_to_call(**implicitly()) - - This form expects that the `get` call has exactly 1 arg (otherwise, a different form would be used) - """ - - logger.debug(ast.dump(get, indent=2)) - output_type = narrow_type(get.args[0], ast.Name) + output_type = cst.ensure_type(get.args[0].value, cst.Name).value dep = next( dep for dep in deps - if dep["output_type"]["name"] == output_type.id and not dep["input_types"] + if dep["output_type"]["name"] == output_type and not dep["input_types"] ) - module = dep["rule_dep"]["module"] - new_function = dep["rule_dep"]["function"] - - new_call = ast.Call( - func=ast.Name(id=new_function), - args=[], - keywords=[ - ast.keyword(value=ast.Call(func=ast.Name(id="implicitly"), args=[], keywords=[])) - ], + module, new_function = dep["rule_dep"]["module"], dep["rule_dep"]["function"] + + new_call = cst.Call( + func=cst.Name(new_function), + args=[cst.Arg(value=cst.Call(cst.Name("implicitly")), star="**")], ) - imports = [ast.ImportFrom(module, names=[ast.alias(new_function)], level=0)] + if called_funcdef := cstutil.extract_functiondef_from_module(module, new_function): + new_call = remove_unused_implicitly(new_call, called_funcdef) + + imports = [cstutil.make_importfrom(module, new_function)] return new_call, imports def map_long_form_get_to_new_syntax( - self, get: ast.Call, deps: list[RuleGraphGetDep] - ) -> tuple[ast.Call, list[ast.ImportFrom]]: - """Map the long form of Get() to the new syntax. - - The expected mapping is roughly: - Get(, , input) -> the_rule_to_call(**implicitly(input)) - - This form expects that the `get` call has exactly 3 args (otherwise, a different form would be used) - """ + self, get: cst.Call, deps: list[RuleGraphGetDep] + ) -> tuple[cst.Call, list[cst.ImportFrom]]: + """Get(, , input) => the_rule_to_call(**implicitly(input))""" - logger.debug(ast.dump(get, indent=2)) - output_type = narrow_type(get.args[0], ast.Name) - input_type = narrow_type(get.args[1], ast.Name) + output_type = cst.ensure_type(get.args[0].value, cst.Name).value + input_type = cst.ensure_type(get.args[1].value, cst.Name).value dep = next( dep for dep in deps - if dep["output_type"]["name"] == output_type.id + if dep["output_type"]["name"] == output_type and len(dep["input_types"]) == 1 - and dep["input_types"][0]["name"] == input_type.id + and dep["input_types"][0]["name"] == input_type ) - module = dep["rule_dep"]["module"] - new_function = dep["rule_dep"]["function"] - - new_call = ast.Call( - func=ast.Name(id=new_function), - args=[], - keywords=[ - ast.keyword( - value=ast.Call( - func=ast.Name(id="implicitly"), - args=[ast.Dict(keys=[get.args[2]], values=[ast.Name(id=input_type.id)])], - keywords=[], - ) + module, new_function = dep["rule_dep"]["module"], dep["rule_dep"]["function"] + + new_call = cst.Call( + func=cst.Name(new_function), + args=[ + cst.Arg( + value=cst.Call( + func=cst.Name("implicitly"), + args=[ + cst.Arg( + value=cst.Dict( + [ + cst.DictElement( + key=get.args[2].value, value=cst.Name(input_type) + ) + ] + ) + ) + ], + ), + star="**", ) ], ) - imports = [ast.ImportFrom(module, names=[ast.alias(new_function)], level=0)] + + imports = [cstutil.make_importfrom(module, new_function)] return new_call, imports def map_short_form_get_to_new_syntax( - self, get: ast.Call, deps: list[RuleGraphGetDep] - ) -> tuple[ast.Call, list[ast.ImportFrom]]: - """Map the short form of Get() to the new syntax. + self, get: cst.Call, deps: list[RuleGraphGetDep] + ) -> tuple[cst.Call, list[cst.ImportFrom]]: + """Get(, ()) => the_rule_to_call(input, **implicitly())""" - The expected mapping is roughly: - Get(, ()) -> the_rule_to_call(input, **implicitly()) - - This form expects that the `get` call has exactly 2 args (otherwise, a different form would be used) - """ - - logger.debug(ast.dump(get, indent=2)) - output_type = narrow_type(get.args[0], ast.Name) - input_call = narrow_type(get.args[1], ast.Call) - input_type = narrow_type(input_call.func, ast.Name) + output_type = cst.ensure_type(get.args[0].value, cst.Name).value + input_call = cst.ensure_type(get.args[1].value, cst.Call) + input_type = cst.ensure_type(input_call.func, cst.Name).value dep = next( dep for dep in deps - if dep["output_type"]["name"] == output_type.id + if dep["output_type"]["name"] == output_type and len(dep["input_types"]) == 1 - and dep["input_types"][0]["name"] == input_type.id + and dep["input_types"][0]["name"] == input_type ) - module = dep["rule_dep"]["module"] - new_function = dep["rule_dep"]["function"] - - new_call = ast.Call( - func=ast.Name(id=new_function), - args=[input_call], - keywords=[ - ast.keyword(value=ast.Call(func=ast.Name(id="implicitly"), args=[], keywords=[])) + module, new_function = dep["rule_dep"]["module"], dep["rule_dep"]["function"] + + new_call = cst.Call( + func=cst.Name(new_function), + args=[ + cst.Arg(value=input_call), + cst.Arg(value=cst.Call(cst.Name("implicitly")), star="**"), ], ) - imports = [ast.ImportFrom(module, names=[ast.alias(new_function)], level=0)] + imports = [cstutil.make_importfrom(module, new_function)] return new_call, imports def map_dict_form_get_to_new_syntax( - self, get: ast.Call, deps: list[RuleGraphGetDep] - ) -> tuple[ast.Call, list[ast.ImportFrom]]: - """Map the dict form of Get() to the new syntax. - - The expected mapping is roughly: - Get(, {input1: , ..inputN: }) -> the_rule_to_call(**implicitly(input)) - - This form expects that the `get` call has exactly 2 args (otherwise, a different form would be used) - """ - - logger.debug(ast.dump(get, indent=2)) - output_type = narrow_type(get.args[0], ast.Name) - input_dict = narrow_type(get.args[1], ast.Dict) - input_types = {k.id for k in input_dict.values if isinstance(k, ast.Name)} + self, get: cst.Call, deps: list[RuleGraphGetDep] + ) -> tuple[cst.Call, list[cst.ImportFrom]]: + """Get(, {input1: , ..inputN: }) => + the_rule_to_call(**implicitly(input))""" + + output_type = cst.ensure_type(get.args[0].value, cst.Name).value + input_dict = cst.ensure_type(get.args[1].value, cst.Dict) + input_types = { + element.value.value + for element in input_dict.elements + if isinstance(element.value, cst.Name) + } dep = next( dep for dep in deps - if dep["output_type"]["name"] == output_type.id + if dep["output_type"]["name"] == output_type and {i["name"] for i in dep["input_types"]} == input_types ) - - module = dep["rule_dep"]["module"] - new_function = dep["rule_dep"]["function"] - - new_call = ast.Call( - func=ast.Name(id=new_function), - args=[], - keywords=[ - ast.keyword( - value=ast.Call(func=ast.Name(id="implicitly"), args=[input_dict], keywords=[]) + module, new_function = dep["rule_dep"]["module"], dep["rule_dep"]["function"] + + new_call = cst.Call( + func=cst.Name(new_function), + args=[ + cst.Arg( + value=cst.Call(func=cst.Name("implicitly"), args=[cst.Arg(input_dict)]), + star="**", ) ], ) - imports = [ast.ImportFrom(module, names=[ast.alias(new_function)], level=0)] + + imports = [cstutil.make_importfrom(module, new_function)] return new_call, imports # ------------------------------------------------------------------------------------------ -# Call-by-name visitor +# Implicity helpers # ------------------------------------------------------------------------------------------ -class CallByNameVisitor(ast.NodeVisitor): - def __init__(self, filename: PurePath, syntax_mapper: CallByNameSyntaxMapper) -> None: - super().__init__() - - self.filename = filename - self.syntax_mapper = syntax_mapper - self.names: set[str] = set() - self.replacements: list[Replacement] = [] - - def visit_Name(self, node: ast.Name): - """Collect all names in the file, so we can avoid shadowing them with the new imports.""" - self.names.add(node.id) - - def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef): - """This function will determine if the node is a rule, if there are any Get(...) calls of - interest, and then call out to the syntax mapper to create the new syntax and imports for - the replacement.""" - - # Ensure we collect all names in this function, as well - names = [n.id for n in ast.walk(node) if isinstance(n, ast.Name)] - self.names.update(names) +def remove_unused_implicitly(call: cst.Call, called_func: cst.FunctionDef) -> cst.Call: + """The CallByNameSyntaxMapper aggressively adds `implicitly` for safety. This function removes + unnecessary ones. - if not self._should_visit_node(node.decorator_list): - return - - self._recurse_body_statements(node.name, node) + The following cases are handled: + - The called function takes no arguments + - TODO: The called function takes the same number of arguments that are passed to it + - TODO: Check the types of the passed in parameters, if they don't match, they need to be implicitly passed + """ + call_func_name = cst.ensure_type(call.func, cst.Name).value + if call_func_name != called_func.name.value: + return call - def _recurse_body_statements(self, root: str, node: ast.stmt): - """Recursively walk the body of a node, including properties of compound statements looking - for Get() calls to replace. - - https://docs.python.org/3/reference/compound_stmts.html - """ - for prop in ["body", "handlers", "orelse", "finalbody"]: - for child in getattr(node, prop, []): - self._recurse_body_statements(root, cast(ast.stmt, child)) - - self._maybe_add_replacements(root, node) - - def _maybe_add_replacements(self, calling_func: str, statement: ast.stmt): - if call := self._maybe_replaceable_call(statement): - if replacement := self.syntax_mapper.map_get_to_new_syntax( - call, self.filename, calling_func=calling_func - ): - self.replacements.append(replacement) - - for call, statement_type in self._maybe_replaceable_multiget(statement): - if replacement := self.syntax_mapper.map_get_to_new_syntax( - call, self.filename, calling_func=calling_func - ): - replacement.add_trailing_comma = statement_type == "call" - self.replacements.append(replacement) - - def _should_visit_node(self, decorator_list: list[ast.expr]) -> bool: - """Only interested in async functions with the @rule(...) or @goal_rule(...) decorator.""" - for decorator in decorator_list: - if isinstance(decorator, ast.Name) and decorator.id in ["rule", "goal_rule"]: - # Accounts for isolated decorators e.g. "@rule" - return True - - if ( - isinstance(decorator, ast.Call) - and isinstance(decorator.func, ast.Name) - and decorator.func.id in ["rule", "goal_rule"] - ): - # Accounts for decorators with params e.g. "@rule(desc=..., level=...)" - return True - - return False - - def _maybe_replaceable_call(self, statement: ast.stmt) -> ast.Call | None: - """Looks for the following forms of Get that we want to replace: - - - bar_get = Get(...) - - bar = await Get(...) - This function is pretty gross, but a lot of it is to satisfy the type checker and - to not need to try/catch everywhere. - """ - if ( - isinstance(statement, ast.Assign) - and ( - isinstance((call_node := statement.value), ast.Call) - or ( - isinstance((await_node := statement.value), ast.Await) - and isinstance((call_node := await_node.value), ast.Call) - ) - ) - and isinstance(call_node.func, ast.Name) - and call_node.func.id == "Get" - ): - return call_node - return None - - def _maybe_replaceable_multiget( - self, statement: ast.stmt - ) -> list[tuple[ast.Call, Literal["call", "generator"]]]: - """Looks for the following forms of MultiGet that we want to replace: - - - multigot = await MultiGet(Get(...), Get(...), ...) - - multigot = await MultiGet(Get(...) for x in y) - """ - if not ( - isinstance(statement, ast.Assign) - and isinstance((await_node := statement.value), ast.Await) - and isinstance((call_node := await_node.value), ast.Call) - and isinstance(call_node.func, ast.Name) - and call_node.func.id == "MultiGet" - ): - return [] - - args: list[tuple[ast.Call, Literal["call", "generator"]]] = [] - for arg in call_node.args: - if ( - isinstance(arg, ast.Call) - and isinstance(arg.func, ast.Name) - and arg.func.id == "Get" - ): - args.append((arg, "call")) - - if ( - isinstance(arg, ast.GeneratorExp) - and isinstance((call_arg := arg.elt), ast.Call) - and isinstance(call_arg.func, ast.Name) - and call_arg.func.id == "Get" - ): - args.append((call_arg, "generator")) - return args + called_params = len(called_func.params.params) + if called_params == 0: + return call.with_changes(args=[]) + return call diff --git a/src/python/pants/goal/migrate_call_by_name_integration_test.py b/src/python/pants/goal/migrate_call_by_name_integration_test.py index 0b21d08b5eb..eb0a27eb617 100644 --- a/src/python/pants/goal/migrate_call_by_name_integration_test.py +++ b/src/python/pants/goal/migrate_call_by_name_integration_test.py @@ -14,7 +14,7 @@ from pants.engine.rules import collect_rules, Get, rule, goal_rule from pants.engine.goal import Goal, GoalSubsystem - from migrateme.rules1 import Bar, Baz, Foo, Thud, rules as rules1 + from migrateme.rules1 import Bar, Foo, Thud, rules as rules1 from migrateme.rules2 import Qux, rules as rules2 class ContrivedGoalSubsystem(GoalSubsystem): @@ -29,7 +29,6 @@ class ContrivedGoal(Goal): async def setup_migrateme(black: Black) -> ContrivedGoal: foo = await Get(Foo, Black, black) bar = await Get(Bar, Black, black) - baz = await Get(Baz, Black, black) qux = await Get(Qux, Black, black) thud = await Get(Thud, Black, black) @@ -42,15 +41,14 @@ def rules(): """\ from pants.backend.python.lint.black.subsystem import Black from pants.engine.rules import collect_rules, Get, rule, goal_rule - from migrateme.rules1 import embedded_comments - from migrateme.rules1 import multiget - from migrateme.rules1 import multiline from migrateme.rules1 import variants + from migrateme.rules1 import multiline from migrateme.rules2 import shadowed + from migrateme.rules1 import multiget from pants.engine.rules import implicitly from pants.engine.goal import Goal, GoalSubsystem - from migrateme.rules1 import Bar, Baz, Foo, Thud, rules as rules1 + from migrateme.rules1 import Bar, Foo, Thud, rules as rules1 from migrateme.rules2 import Qux, rules as rules2 class ContrivedGoalSubsystem(GoalSubsystem): @@ -65,7 +63,6 @@ class ContrivedGoal(Goal): async def setup_migrateme(black: Black) -> ContrivedGoal: foo = await variants(**implicitly({black: Black})) bar = await multiline(**implicitly({black: Black})) - baz = await embedded_comments(**implicitly({black: Black})) qux = await shadowed(**implicitly({black: Black})) thud = await multiget(**implicitly({black: Black})) @@ -86,6 +83,15 @@ def rules(): from pants.engine.rules import collect_rules, Get, MultiGet, rule from pants.engine.target import AllTargets + # TODO: Not handled by rule-graph + async def non_annotated_rule_helpers(): + await Get( + VenvPex, + PexRequest, + # Some comment that the AST parse wipes out, but CST should keep + black.to_pex_request() + ) + class Foo: pass @@ -94,7 +100,7 @@ async def variants(black: Black, local_env: ChosenLocalEnvironmentName) -> Foo: all_targets = await Get(AllTargets) pex = await Get(VenvPex, PexRequest, black.to_pex_request()) digest = await Get(Digest, CreateArchive(EMPTY_SNAPSHOT)) - paths = await Get(BinaryPaths, {{BinaryPathRequest(binary_name="time", search_path=("/usr/bin")): BinaryPathRequest, local_env.val: EnvironmentName}}) + paths = await Get(BinaryPaths, {{BinaryPathRequest(binary_name='time', search_path=['/usr/bin']): BinaryPathRequest, local_env.val: EnvironmentName}}) try: try_all_targets_try = await Get(AllTargets) except: @@ -109,6 +115,7 @@ async def variants(black: Black, local_env: ChosenLocalEnvironmentName) -> Foo: conditional_all_targets_elif = await Get(AllTargets) else: conditional_all_targets_else = await Get(AllTargets) + await non_annotated_rule_helpers() class Bar: pass @@ -121,18 +128,6 @@ async def multiline(black: Black) -> Bar: black.to_pex_request() ) - class Baz: - pass - - @rule(desc="Ensure calls with embedded comments are ignored") - async def embedded_comments(black: Black) -> Baz: - pex = await Get( - VenvPex, - PexRequest, - # Some comment that the AST parse wipes out, so we can't migrate this call safely - black.to_pex_request() - ) - class Thud: pass @@ -150,6 +145,14 @@ async def multiget(black: Black) -> Thud: ), digest_get ) + multigot_sameline = await MultiGet( + Get(AllTargets), all_targets_get, + Get( + VenvPex, + PexRequest, + black.to_pex_request() + ), digest_get + ) multigot_forloop = await MultiGet( Get(Digest, CreateArchive(EMPTY_SNAPSHOT)) for i in [0, 1, 2] @@ -170,36 +173,46 @@ def rules(): from pants.engine.environment import ChosenLocalEnvironmentName, EnvironmentName from pants.engine.fs import Digest, EMPTY_SNAPSHOT from pants.engine.rules import collect_rules, Get, concurrently, rule + from pants.engine.internals.graph import find_all_targets from pants.backend.python.util_rules.pex import create_venv_pex from pants.core.util_rules.archive import create_archive from pants.core.util_rules.system_binaries import find_binary - from pants.engine.internals.graph import find_all_targets from pants.engine.rules import implicitly from pants.engine.target import AllTargets + # TODO: Not handled by rule-graph + async def non_annotated_rule_helpers(): + await Get( + VenvPex, + PexRequest, + # Some comment that the AST parse wipes out, but CST should keep + black.to_pex_request() + ) + class Foo: pass @rule async def variants(black: Black, local_env: ChosenLocalEnvironmentName) -> Foo: - all_targets = await find_all_targets(**implicitly()) + all_targets = await find_all_targets() pex = await create_venv_pex(**implicitly({black.to_pex_request(): PexRequest})) digest = await create_archive(CreateArchive(EMPTY_SNAPSHOT), **implicitly()) - paths = await find_binary(**implicitly({BinaryPathRequest(binary_name='time', search_path='/usr/bin'): BinaryPathRequest, local_env.val: EnvironmentName})) + paths = await find_binary(**implicitly({BinaryPathRequest(binary_name='time', search_path=['/usr/bin']): BinaryPathRequest, local_env.val: EnvironmentName})) try: - try_all_targets_try = await find_all_targets(**implicitly()) + try_all_targets_try = await find_all_targets() except: - try_all_targets_except = await find_all_targets(**implicitly()) + try_all_targets_except = await find_all_targets() else: - try_all_targets_else = await find_all_targets(**implicitly()) + try_all_targets_else = await find_all_targets() finally: - try_all_targets_finally = await find_all_targets(**implicitly()) + try_all_targets_finally = await find_all_targets() if True: - conditional_all_targets_if = await find_all_targets(**implicitly()) + conditional_all_targets_if = await find_all_targets() elif False: - conditional_all_targets_elif = await find_all_targets(**implicitly()) + conditional_all_targets_elif = await find_all_targets() else: - conditional_all_targets_else = await find_all_targets(**implicitly()) + conditional_all_targets_else = await find_all_targets() + await non_annotated_rule_helpers() class Bar: pass @@ -208,31 +221,23 @@ class Bar: async def multiline(black: Black) -> Bar: pex = await create_venv_pex(**implicitly({black.to_pex_request(): PexRequest})) - class Baz: - pass - - @rule(desc="Ensure calls with embedded comments are ignored") - async def embedded_comments(black: Black) -> Baz: - pex = await Get( - VenvPex, - PexRequest, - # Some comment that the AST parse wipes out, so we can't migrate this call safely - black.to_pex_request() - ) - class Thud: pass @rule(desc="Ensure calls used with multiget are migrated") async def multiget(black: Black) -> Thud: - all_targets_get = find_all_targets(**implicitly()) + all_targets_get = find_all_targets() digest_get = create_archive(CreateArchive(EMPTY_SNAPSHOT), **implicitly()) multigot = await concurrently( - find_all_targets(**implicitly()), + find_all_targets(), all_targets_get, create_venv_pex(**implicitly({black.to_pex_request(): PexRequest})), digest_get ) + multigot_sameline = await concurrently( + find_all_targets(), all_targets_get, + create_venv_pex(**implicitly({black.to_pex_request(): PexRequest})), digest_get + ) multigot_forloop = await concurrently( create_archive(CreateArchive(EMPTY_SNAPSHOT), **implicitly()) for i in [0, 1, 2] @@ -310,13 +315,6 @@ def test_migrate_call_by_name_syntax(): # Ensure the JSON output contains the paths to the files we expect to migrate assert all(str(p) in result.stdout for p in [register_path, rules1_path, rules2_path]) - # Ensure the warning for embedded comments is logged - # Note: This assertion is brittle - adding extra content to rules1.py will probably mean updating the range - assert ( - f"Comments found in {tmpdir}/src/migrateme/rules1.py within replacement range: (51, 56)" - in result.stderr - ) - with open(register_path) as f: assert f.read() == MIGRATED_REGISTER_FILE with open(rules1_path) as f: diff --git a/src/python/pants/goal/migrate_call_by_name_test.py b/src/python/pants/goal/migrate_call_by_name_test.py index 52665659f46..85536d49f0b 100644 --- a/src/python/pants/goal/migrate_call_by_name_test.py +++ b/src/python/pants/goal/migrate_call_by_name_test.py @@ -3,26 +3,31 @@ from __future__ import annotations -import ast from copy import deepcopy from pathlib import PurePath -from typing import Final +from typing import Final, Iterable -from pants.goal.migrate_call_by_name import Replacement +import libcst as cst + +from pants.goal.migrate_call_by_name import Replacement, remove_unused_implicitly +from pants.util.cstutil import make_importfrom_attr as import_attr OLD_FUNC_NAME: Final[str] = "hello" NEW_FUNC_NAME: Final[str] = "goodbye" +IMPORT_ATTR_ENGINE: Final[cst.Attribute | cst.Name] = import_attr("pants.engine.rules") +IMPORT_ATTR_GREETING: Final[cst.Attribute | cst.Name] = import_attr("pants.greeting") + DEFAULT_REPLACEMENT: Final[Replacement] = Replacement( filename=PurePath("pants/foo/bar.py"), module="pants.foo.bar", - line_range=(1, 3), - col_range=(3, 4), - current_source=ast.Call(func=ast.Name(id=OLD_FUNC_NAME), args=[], keywords=[]), - new_source=ast.Call(func=ast.Name(id=NEW_FUNC_NAME), args=[], keywords=[]), + current_source=cst.Call(func=cst.Name(OLD_FUNC_NAME)), + new_source=cst.Call(func=cst.Name(NEW_FUNC_NAME)), additional_imports=[ - ast.ImportFrom(module="pants.engine.rules", names=[ast.alias("implicitly")], level=0), - ast.ImportFrom(module="pants.greeting", names=[ast.alias(NEW_FUNC_NAME)], level=0), + cst.ImportFrom(module=IMPORT_ATTR_ENGINE, names=[cst.ImportAlias(cst.Name("implicitly"))]), + cst.ImportFrom( + module=IMPORT_ATTR_GREETING, names=[cst.ImportAlias(cst.Name(NEW_FUNC_NAME))] + ), ], ) @@ -30,29 +35,33 @@ def test_replacement_sanitizes_circular_imports(): replacement = deepcopy(DEFAULT_REPLACEMENT) replacement.additional_imports.append( - ast.ImportFrom(module="pants.foo.bar", names=[ast.alias("baz")], level=0) + cst.ImportFrom( + module=import_attr("pants.foo.bar"), names=[cst.ImportAlias(cst.Name("baz"))] + ) ) sanitized_imports = replacement.sanitized_imports() assert len(sanitized_imports) == 2 - assert sanitized_imports[0].module == "pants.engine.rules" - assert sanitized_imports[1].module == "pants.greeting" + assert sanitized_imports[0].module is not None + assert sanitized_imports[0].module.deep_equals(IMPORT_ATTR_ENGINE) + assert sanitized_imports[1].module is not None + assert sanitized_imports[1].module.deep_equals(IMPORT_ATTR_GREETING) def test_replacement_sanitize_noop(): replacement = deepcopy(DEFAULT_REPLACEMENT) - replacement.sanitize(names=set()) + replacement.sanitize(unavailable_names=set()) assert str(replacement) == str(DEFAULT_REPLACEMENT) - replacement.sanitize(names={"fake_name", "irrelevant_name"}) + replacement.sanitize(unavailable_names={"fake_name", "irrelevant_name"}) assert str(replacement) == str(DEFAULT_REPLACEMENT) def test_replacement_sanitize_noop_in_same_module(): replacement = deepcopy(DEFAULT_REPLACEMENT) replacement.additional_imports = [] - replacement.sanitize(names={NEW_FUNC_NAME}) + replacement.sanitize(unavailable_names={NEW_FUNC_NAME}) unsanitized_replacement = deepcopy(DEFAULT_REPLACEMENT) unsanitized_replacement.additional_imports = [] @@ -62,46 +71,46 @@ def test_replacement_sanitize_noop_in_same_module(): def test_replacement_sanitizes_shadowed_code(): replacement = deepcopy(DEFAULT_REPLACEMENT) - replacement.sanitize(names={NEW_FUNC_NAME}) + replacement.sanitize(unavailable_names={NEW_FUNC_NAME}) assert str(replacement) != str(DEFAULT_REPLACEMENT) - assert isinstance(replacement.new_source.func, ast.Name) - assert replacement.new_source.func.id == f"{NEW_FUNC_NAME}_get" - assert replacement.additional_imports[1].module == "pants.greeting" - assert replacement.additional_imports[1].names[0].name == f"{NEW_FUNC_NAME}" - assert replacement.additional_imports[1].names[0].asname == f"{NEW_FUNC_NAME}_get" + assert isinstance(replacement.new_source.func, cst.Name) + assert replacement.new_source.func.value == f"{NEW_FUNC_NAME}_get" -def test_replacement_comments_are_flagged(): - assert not DEFAULT_REPLACEMENT.contains_comments("await Get(args)") - assert DEFAULT_REPLACEMENT.contains_comments("# A comment") + imp = replacement.additional_imports[1] + assert imp.module is not None + assert imp.module.deep_equals(IMPORT_ATTR_GREETING) - assert not DEFAULT_REPLACEMENT.contains_comments( - """ - await Get( - args - ) - """ - ) - assert DEFAULT_REPLACEMENT.contains_comments( - """ - await Get( # A comment in the replacement range - args - ) - """ - ) - assert DEFAULT_REPLACEMENT.contains_comments( - """ - await Get( - # A comment in the replacement range - args) - """ + assert isinstance(imp.names, Iterable) + assert imp.names[0].name.deep_equals(cst.Name(NEW_FUNC_NAME)) + assert imp.names[0].asname is not None + assert imp.names[0].asname.deep_equals(cst.AsName(cst.Name(f"{NEW_FUNC_NAME}_get"))) + + +def test_remove_unused_implicity_noop(): + call = cst.Call( + func=cst.Name("do_foo"), args=[cst.Arg(cst.Call(func=cst.Name("implicitly")), star="**")] ) + called_func = _default_func_def() - assert not DEFAULT_REPLACEMENT.contains_comments( - """ - await Get( - args - ) - # A comment below the replacement range - """ + new_call = remove_unused_implicitly(call, called_func) + assert new_call.deep_equals(call) + + +def test_remove_unused_implicity_(): + call = cst.Call( + func=cst.Name("do_foo"), args=[cst.Arg(cst.Call(func=cst.Name("implicitly")), star="**")] ) + called_func = _default_func_def().with_changes(name=cst.Name("do_foo")) + + new_call = remove_unused_implicitly(call, called_func) + assert not new_call.deep_equals(call) + assert len(new_call.args) == 0 + + +def _default_func_def() -> cst.FunctionDef: + return cst.FunctionDef( + name=cst.Name("REPLACE_ME"), + params=cst.Parameters(), + body=cst.IndentedBlock(body=[cst.SimpleStatementLine([cst.Pass()])]), + ).deep_clone() diff --git a/src/python/pants/util/astutil.py b/src/python/pants/util/astutil.py deleted file mode 100644 index 54f0e00a9f5..00000000000 --- a/src/python/pants/util/astutil.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - - -from __future__ import annotations - -import ast -from typing import TypeVar, cast - -T = TypeVar("T") - - -def maybe_narrow_type(node: ast.expr, types: type[T] | tuple[type[T], ...]) -> T | None: - """Narrow an AST expr to another type if it matches the expected type at runtime, or None.""" - return cast(T, node) if isinstance(node, types) else None - - -def narrow_type(node: ast.expr, types: type[T] | tuple[type[T], ...]) -> T: - """Narrow an AST expr to another type if it matches the expected type at runtime, or throw. - - :raises ValueError: if the node is not an instance of the expected type. - """ - if value := maybe_narrow_type(node, types): - return value - raise ValueError(f"Expected {node} to be one of {types}") diff --git a/src/python/pants/util/astutil_test.py b/src/python/pants/util/astutil_test.py deleted file mode 100644 index 1dab34dc09a..00000000000 --- a/src/python/pants/util/astutil_test.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - - -from __future__ import annotations - -import ast - -import pytest - -from pants.util.astutil import maybe_narrow_type, narrow_type - - -def test_narrow_type(): - value = ast.Call(func=ast.Name(id="helloworld"), args=[], keywords=[]) - - assert narrow_type(value, ast.Call) == value - with pytest.raises(ValueError): - narrow_type(value, ast.Name) - - assert narrow_type(value, (ast.Call, ast.Dict)) == value - with pytest.raises(ValueError): - narrow_type(value, (ast.Constant, ast.Name)) - - -def test_maybe_narrow_type(): - value = ast.Call(func=ast.Name(id="helloworld"), args=[], keywords=[]) - - assert maybe_narrow_type(value, ast.Call) == value - assert maybe_narrow_type(value, ast.Name) is None - - assert maybe_narrow_type(value, (ast.Call, ast.Dict)) == value - assert maybe_narrow_type(value, (ast.Constant, ast.Name)) is None diff --git a/src/python/pants/util/cstutil.py b/src/python/pants/util/cstutil.py new file mode 100644 index 00000000000..0f17cd1161c --- /dev/null +++ b/src/python/pants/util/cstutil.py @@ -0,0 +1,54 @@ +# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +import importlib.util +import logging +from typing import Union + +import libcst as cst +import libcst.matchers as m + +logger = logging.getLogger(__name__) + + +def make_importfrom(module: str, func: str) -> cst.ImportFrom: + """Generates a cst.ImportFrom from a module and function string.""" + return cst.ImportFrom( + module=make_importfrom_attr(module), names=[cst.ImportAlias(cst.Name(func))] + ) + + +def make_importfrom_attr(module: str) -> cst.Attribute | cst.Name: + """Generates a cst.Attribute or cst.Name from a module string.""" + parts = module.split(".") + if len(parts) == 1: + return cst.Name(parts[0]) + + partial_module = ".".join(parts[:-1]) + return cst.Attribute(value=make_importfrom_attr(partial_module), attr=cst.Name(parts[-1])) + + +def make_importfrom_attr_matcher(module: str) -> Union[m.Attribute, m.Name]: + """Generates a cst matcher.Attribute or matcher.Name from a module string.""" + parts = module.split(".") + if len(parts) == 1: + return m.Name(parts[0]) + + partial_module = ".".join(parts[:-1]) + return m.Attribute(value=make_importfrom_attr_matcher(partial_module), attr=m.Name(parts[-1])) + + +def extract_functiondef_from_module(module: str, func: str) -> cst.FunctionDef | None: + """Parse the file associated with the module return `func` as a FunctionDef.""" + if not (spec := importlib.util.find_spec(module)): + logger.warning(f"Failed to find module {module}") + return None + + assert spec.origin is not None + with open(spec.origin) as f: + source_code = f.read() + tree = cst.parse_module(source_code) + results = m.findall(tree, matcher=m.FunctionDef(m.Name(func))) + return cst.ensure_type(results[0], cst.FunctionDef) if results else None diff --git a/src/python/pants/util/cstutil_test.py b/src/python/pants/util/cstutil_test.py new file mode 100644 index 00000000000..61d8a78ceff --- /dev/null +++ b/src/python/pants/util/cstutil_test.py @@ -0,0 +1,44 @@ +# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +import libcst as cst +import libcst.matchers as m + +from pants.util.cstutil import make_importfrom, make_importfrom_attr, make_importfrom_attr_matcher + + +def test_make_importfrom(): + imp = make_importfrom("pants.engine.rules", "collect_rules") + assert imp.deep_equals( + cst.ImportFrom( + module=cst.Attribute( + value=cst.Attribute( + value=cst.Name("pants"), + attr=cst.Name("engine"), + ), + attr=cst.Name("rules"), + ), + names=[cst.ImportAlias(name=cst.Name("collect_rules"))], + ) + ) + + +def test_make_importfrom_attr(): + attr = make_importfrom_attr("pants.engine.rules") + assert attr.deep_equals( + cst.Attribute( + value=cst.Attribute( + value=cst.Name("pants"), + attr=cst.Name("engine"), + ), + attr=cst.Name("rules"), + ) + ) + + +def test_make_importfrom_attr_matcher(): + node = make_importfrom_attr("pants.engine.rules") + assert m.matches(node, make_importfrom_attr_matcher("pants.engine.rules")) + assert not m.matches(node, make_importfrom_attr_matcher("pants.engine.rulez"))