diff --git a/src/python/pants/engine/query.py b/src/python/pants/engine/query.py new file mode 100644 index 000000000000..41f4a36967ec --- /dev/null +++ b/src/python/pants/engine/query.py @@ -0,0 +1,239 @@ +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import ast +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Dict, Tuple, Type, TypeVar + +from pants.build_graph.address import Address +from pants.engine.addresses import Addresses +from pants.engine.internals.graph import Owners, OwnersRequest +from pants.engine.rules import RootRule, rule +from pants.engine.selectors import Get +from pants.engine.unions import UnionMembership, UnionRule, union +from pants.scm.subsystems.changed import ChangedOptions, ChangedAddresses, ChangedRequest, DependeesOption, UncachedScmWrapper +from pants.util.meta import classproperty +from pants.util.strutil import safe_shlex_split + + +@union +class QueryComponent(ABC): + + @classproperty + @abstractmethod + def function_name(cls): + """The initial argument of a shlexed query expression. + + If the user provides --query=' ' on the command line, and `` matches this + property, the .parse_from_args() method is invoked with `` (shlexed, so split by + spaces). + """ + + @classmethod + @abstractmethod + def parse_from_args(cls, *args): + """Create an instance of this class from variadic positional string arguments. + + This method should raise an error if the args are incorrect or invalid. + """ + + +@dataclass(frozen=True) +class QueryAddresses: + addresses: Addresses + + +@dataclass(frozen=True) +class OwnerOf(QueryComponent): + files: Tuple[str] + + function_name = 'owner_of' + + @classmethod + def parse_from_args(cls, *args): + return cls(files=tuple([str(f) for f in args])) + + +@rule +async def owner_of_request(owner_of: OwnerOf) -> QueryAddresses: + request = OwnersRequest(sources=owner_of.files) + owners = await Get(Owners, OwnersRequest, request) + return QueryAddresses(Addresses(bfa.to_address() for bfa in owners.addresses)) + + +@dataclass(frozen=True) +class ChangesSince(QueryComponent): + since: str + dependees: DependeesOption + + function_name = 'since' + + @classmethod + def parse_from_args(cls, since, dependees=DependeesOption.NONE): + return cls(since=str(since), + dependees=DependeesOption(dependees)) + + +@rule +async def since_request( + scm_wrapper: UncachedScmWrapper, + since: ChangesSince, +) -> QueryAddresses: + scm = scm_wrapper.scm + changed_options = ChangedOptions( + since=since.since, + diffspec=None, + dependees=since.dependees, + ) + changed = await Get(ChangedAddresses, ChangedRequest( + sources=tuple(changed_options.changed_files(scm=scm)), + dependees=changed_options.dependees, + )) + return QueryAddresses(changed.addresses) + + +@dataclass(frozen=True) +class ChangesForDiffspec(QueryComponent): + diffspec: str + dependees: DependeesOption + + function_name = 'changes_for_diffspec' + + @classmethod + def parse_from_args(cls, diffspec, dependees=DependeesOption.NONE): + return cls(diffspec=str(diffspec), + dependees=DependeesOption(dependees)) + + +@rule +async def changes_for_diffspec_request( + scm_wrapper: UncachedScmWrapper, + changes_for_diffspec: ChangesForDiffspec, +) -> QueryAddresses: + scm = scm_wrapper.scm + changed_options = ChangedOptions( + since=None, + diffspec=changes_for_diffspec.diffspec, + dependees=changes_for_diffspec.dependees, + ) + changed = await Get(ChangedAddresses, ChangedRequest( + sources=tuple(changed_options.changed_files(scm=scm)), + dependees=changed_options.dependees, + )) + return QueryAddresses(changed.addresses) + + +_T = TypeVar('_T', bound=QueryComponent) + + +@dataclass(frozen=True) +class KnownQueryExpressions: + components: Dict[str, Type[_T]] + + +@rule +def known_query_expressions(union_membership: UnionMembership) -> KnownQueryExpressions: + return KnownQueryExpressions({ + union_member.function_name: union_member + for union_member in union_membership[QueryComponent] + }) + + +@dataclass(frozen=True) +class QueryParseInput: + expr: str + + +class QueryParseError(Exception): pass + + +@dataclass(frozen=True) +class QueryComponentWrapper: + underlying: _T + + +@dataclass(frozen=True) +class ParsedPythonesqueFunctionCall: + """Representation of a limited form of python named function calls.""" + function_name: str + positional_args: Tuple[Any, ...] + keyword_args: Dict[str, Any] + + +def _parse_python_arg(arg_value: ast.AST) -> Any: + """Convert an AST node for the argument of a function call into its literal value.""" + return ast.literal_eval(arg_value) + + +def _parse_python_esque_function_call(expr: str) -> ParsedPythonesqueFunctionCall: + """Parse a string into a description of a python function call expression.""" + try: + query_expression = ast.parse(expr).body[0].value + except Exception as e: + raise QueryParseError(f'Error parsing query expression: {e}') from e + + if not isinstance(query_expression, ast.Call): + type_name = type(query_expression).__name__ + raise QueryParseError( + f'Query expression must be a single function call, but received {type_name}: ' + f'{ast.dump(query_expression)}.') + + func_expr = query_expression.func + if not isinstance(func_expr, ast.Name): + raise QueryParseError('Function call in query expression should just be a name, but ' + f'received {type(func_expr).__name__}: {ast.dump(func_expr)}.') + function_name = func_expr.id + + positional_args = [_parse_python_arg(x) for x in query_expression.args] + keyword_args = { + k.arg: _parse_python_arg(k.value) + for k in query_expression.keywords + } + + return ParsedPythonesqueFunctionCall( + function_name=function_name, + positional_args=positional_args, + keyword_args=keyword_args, + ) + + +# FIXME: allow returning an @union!!! +@rule +def parse_query_expr(s: QueryParseInput, known: KnownQueryExpressions) -> QueryComponentWrapper: + """Parse the input string and attempt to find a query function matching the function call. + + :return: A query component which can be resolved into `BuildFileAddresses` in the v2 engine. + """ + try: + parsed_function_call = _parse_python_esque_function_call(s.expr) + except Exception as e: + raise QueryParseError(f'Error parsing expression {s}: {e}.') from e + + name = parsed_function_call.function_name + args = parsed_function_call.positional_args + kwargs = parsed_function_call.keyword_args + + selected_function = known.components.get(name, None) + if selected_function: + return QueryComponentWrapper(selected_function.parse_from_args(*args, **kwargs)) + else: + raise QueryParseError( + f'Query function with name {name} not found (in expr {s})! The known functions are: {known}.') + + +def rules(): + return [ + RootRule(OwnerOf), + RootRule(ChangesSince), + RootRule(QueryParseInput), + RootRule(ChangesForDiffspec), + known_query_expressions, + UnionRule(QueryComponent, OwnerOf), + UnionRule(QueryComponent, ChangesSince), + UnionRule(QueryComponent, ChangesForDiffspec), + owner_of_request, + since_request, + changes_for_diffspec_request, + parse_query_expr, + ] diff --git a/src/python/pants/init/engine_initializer.py b/src/python/pants/init/engine_initializer.py index bf12a900afb5..3101ba86d0a5 100644 --- a/src/python/pants/init/engine_initializer.py +++ b/src/python/pants/init/engine_initializer.py @@ -25,6 +25,7 @@ from pants.engine.internals.target_adaptor import TargetAdaptor from pants.engine.platform import create_platform_rules from pants.engine.rules import RootRule, rule +from pants.engine.query import rules as query_rules from pants.engine.selectors import Params from pants.engine.target import RegisteredTargetTypes from pants.engine.unions import UnionMembership diff --git a/src/python/pants/init/extension_loader.py b/src/python/pants/init/extension_loader.py index a9dc3e70ce28..d6020a3fd0b4 100644 --- a/src/python/pants/init/extension_loader.py +++ b/src/python/pants/init/extension_loader.py @@ -9,6 +9,7 @@ from pants.base.exceptions import BackendConfigurationError from pants.build_graph.build_configuration import BuildConfiguration +from pants.engine.query import rules as query_rules from pants.util.ordered_set import FrozenOrderedSet @@ -113,6 +114,8 @@ def load_build_configuration_from_source( for backend_package in backend_packages: load_backend(build_configuration, backend_package) + build_configuration.register_rules(query_rules()) + def load_backend(build_configuration: BuildConfiguration.Builder, backend_package: str) -> None: """Installs the given backend package into the build configuration. diff --git a/src/python/pants/init/specs_calculator.py b/src/python/pants/init/specs_calculator.py index 5ff3d3cef793..5d474fd27e21 100644 --- a/src/python/pants/init/specs_calculator.py +++ b/src/python/pants/init/specs_calculator.py @@ -17,10 +17,16 @@ Specs, ) from pants.engine.internals.scheduler import SchedulerSession +from pants.engine.query import QueryAddresses, QueryComponentWrapper, QueryParseInput from pants.engine.selectors import Params from pants.option.options import Options from pants.option.options_bootstrapper import OptionsBootstrapper -from pants.scm.subsystems.changed import ChangedAddresses, ChangedOptions, ChangedRequest +from pants.scm.subsystems.changed import ( + ChangedAddresses, + ChangedOptions, + ChangedRequest, + UncachedScmWrapper, +) from pants.util.ordered_set import OrderedSet logger = logging.getLogger(__name__) @@ -74,6 +80,7 @@ def create( exclude_patterns: Optional[Iterable[str]] = None, tags: Optional[Iterable[str]] = None, ) -> Specs: + # Determine the literal specs. specs = cls.parse_specs( raw_specs=options.specs, build_root=build_root, @@ -81,10 +88,21 @@ def create( tags=tags, ) + # Determine `Changed` arguments directly from options to support pre-`Subsystem` + # initialization paths. changed_options = ChangedOptions.from_options(options.for_scope("changed")) + # Parse --query expressions into objects which can be resolved into BuildFileAddresses via v2 + # rules. + query_expr_strings = options.for_global_scope().query + exprs = session.product_request( + QueryComponentWrapper, [QueryParseInput(s) for s in query_expr_strings] + ) + exprs = [ex.underlying for ex in exprs] + logger.debug("specs are: %s", specs) logger.debug("changed_options are: %s", changed_options) + logger.debug("query exprs are: %s (%s)", exprs, bool(exprs)) if specs.provided and changed_options.provided: changed_name = "--changed-since" if changed_options.since else "--changed-diffspec" @@ -102,12 +120,18 @@ def create( if not changed_options.provided: return specs - scm = get_scm() - if not scm: - raise InvalidSpecConstraint( - "The `--changed-*` options are not available without a recognized SCM (usually " - "Git)." - ) + def scm(entity): + scm = get_scm() + if not scm: + raise InvalidSpecConstraint( + # TODO: centralize the error messaging for when an SCM is required, and describe what SCMs + # are supported! + "{} are not available without a recognized SCM (usually git).".format(entity) + ) + return scm + + + scm = scm('The `--changed-*` options') changed_request = ChangedRequest( sources=tuple(changed_options.changed_files(scm=scm)), dependees=changed_options.dependees, @@ -125,6 +149,35 @@ def create( filesystem_specs.append(FilesystemLiteralSpec(file_name)) else: address_specs.append(SingleAddress(address.spec_path, address.target_name)) + + + if exprs: + # TODO: this should only be necessary for the `changed-since`/etc queries! This can be done by + # returning a dummy ScmWrapper if no `changed-*` queries are used! + scm = scm("The --query option") + # TODO(#7346): deprecate --owner-of and --changed-* in favor of --query versions, allow + # pipelining of successive query expressions with the command-line target specs as the initial + # input! + if len(exprs) > 1: + raise ValueError( + "Only one --query argument is currently supported! Received: {}.".format(exprs) + ) + + scm_wrapper = UncachedScmWrapper.create(scm) + (expr_addresses,) = session.product_request( + QueryAddresses, [Params(scm_wrapper, exprs[0])] + ) + logger.debug("expr addresses: %s", expr_addresses) + dependencies = tuple( + SingleAddress(a.spec_path, a.target_name) for a in expr_addresses.addresses + ) + return Specs( + address_specs=AddressSpecs( + dependencies=dependencies, exclude_patterns=exclude_patterns, tags=tags + ), + filesystem_specs=FilesystemSpecs(filesystem_specs), + ) + return Specs( address_specs=AddressSpecs( address_specs, exclude_patterns=exclude_patterns, tags=tags, diff --git a/src/python/pants/option/global_options.py b/src/python/pants/option/global_options.py index 1a58842c382c..a86effd5c01a 100644 --- a/src/python/pants/option/global_options.py +++ b/src/python/pants/option/global_options.py @@ -414,6 +414,20 @@ def register_bootstrap_options(cls, register): help="Read additional specs from this file (e.g. target addresses or file names). " "Each spec should be one per line.", ) + + register( + "--query", + type=list, + default=[], + fromfile=True, + metavar="", + # TODO: rename this to restart_daemon=False! It's not clear where this option is + # even consumed. + daemon=False, + help="A list of query expressions which process the input target specs in a " + "pipeline in order.", + ) + register( "--verify-config", type=bool, diff --git a/src/python/pants/scm/subsystems/changed.py b/src/python/pants/scm/subsystems/changed.py index 4c75d46965cd..a8e14e25ae66 100644 --- a/src/python/pants/scm/subsystems/changed.py +++ b/src/python/pants/scm/subsystems/changed.py @@ -1,6 +1,7 @@ # Copyright 2016 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +import uuid from dataclasses import dataclass from enum import Enum from typing import List, Optional, Tuple, cast @@ -19,6 +20,7 @@ from pants.engine.rules import RootRule, rule from pants.engine.selectors import Get from pants.option.option_value_container import OptionValueContainer +from pants.option.scope import Scope, ScopedOptions from pants.scm.scm import Scm from pants.subsystem.subsystem import Subsystem @@ -160,10 +162,24 @@ def register_options(cls, register): ) +@dataclass(frozen=True) +class UncachedScmWrapper: + """???/the salt is intended to be different every time, so the scm should avoid being memoized + by the engine!""" + + scm: Scm + salt: str + + @classmethod + def create(cls, scm: Scm) -> "UncachedScmWrapper": + return cls(scm=scm, salt=str(uuid.uuid4()),) + + def rules(): return [ map_addresses_to_dependees, find_dependees, find_changed_owners, RootRule(ChangedRequest), + RootRule(UncachedScmWrapper), ]