-
-
Notifications
You must be signed in to change notification settings - Fork 636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a --query option for unifying target selection #7350
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,250 @@ | ||
# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). | ||
# Licensed under the Apache License, Version 2.0 (see LICENSE). | ||
|
||
import ast | ||
from abc import ABC, abstractmethod | ||
from dataclasses import dataclass | ||
from typing import Any, Dict, Tuple, Type, TypeVar | ||
|
||
from pants.build_graph.address import Address | ||
from pants.engine.collection import Collection | ||
from pants.engine.internals.graph import Owners, OwnersRequest | ||
from pants.engine.rules import RootRule, rule | ||
from pants.engine.selectors import Get | ||
from pants.engine.unions import UnionMembership, UnionRule, union | ||
from pants.scm.git import Git | ||
from pants.scm.subsystems.changed import ( | ||
ChangedFiles, | ||
ChangedFilesRequest, | ||
ChangedOptions, | ||
ChangedAddresses, | ||
ChangedRequest, | ||
DependeesOption, | ||
) | ||
from pants.util.meta import classproperty | ||
from pants.util.strutil import safe_shlex_split | ||
|
||
|
||
@union | ||
class QueryComponent(ABC): | ||
|
||
@classproperty | ||
@abstractmethod | ||
def function_name(cls): | ||
"""The initial argument of a shlexed query expression. | ||
|
||
If the user provides --query='<name> <args...>' on the command line, and `<name>` matches this | ||
property, the .parse_from_args() method is invoked with `<args...>` (shlexed, so split by | ||
spaces). | ||
""" | ||
|
||
@classmethod | ||
@abstractmethod | ||
def parse_from_args(cls, *args): | ||
"""Create an instance of this class from variadic positional string arguments. | ||
|
||
This method should raise an error if the args are incorrect or invalid. | ||
""" | ||
|
||
|
||
class QueryAddresses(Collection[Address]): | ||
pass | ||
|
||
|
||
@dataclass(frozen=True) | ||
class OwnerOf(QueryComponent): | ||
files: Tuple[str] | ||
|
||
function_name = 'owner_of' | ||
|
||
@classmethod | ||
def parse_from_args(cls, *args): | ||
return cls(files=tuple([str(f) for f in args])) | ||
|
||
|
||
@rule | ||
async def owner_of_request(owner_of: OwnerOf) -> QueryAddresses: | ||
request = OwnersRequest(sources=owner_of.files, do_not_generate_subtargets=True) | ||
owners = await Get(Owners, OwnersRequest, request) | ||
return QueryAddresses(owners) | ||
Comment on lines
+54
to
+69
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
|
||
@dataclass(frozen=True) | ||
class ChangesSince(QueryComponent): | ||
since: str | ||
dependees: DependeesOption | ||
|
||
function_name = 'since' | ||
|
||
@classmethod | ||
def parse_from_args(cls, since, dependees=DependeesOption.NONE): | ||
return cls(since=str(since), | ||
dependees=DependeesOption(dependees)) | ||
|
||
|
||
@rule | ||
async def since_request( | ||
git: Git, | ||
since: ChangesSince, | ||
) -> QueryAddresses: | ||
changed_options = ChangedOptions( | ||
since=since.since, | ||
diffspec=None, | ||
dependees=since.dependees, | ||
) | ||
changed_files = await Get(ChangedFiles, ChangedFilesRequest(changed_options, git)) | ||
changed = await Get(ChangedAddresses, ChangedRequest( | ||
sources=tuple(changed_files), | ||
dependees=changed_options.dependees, | ||
do_not_generate_subtargets=True, | ||
)) | ||
return QueryAddresses(changed) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ChangesForDiffspec(QueryComponent): | ||
diffspec: str | ||
dependees: DependeesOption | ||
|
||
function_name = 'changes_for_diffspec' | ||
|
||
@classmethod | ||
def parse_from_args(cls, diffspec, dependees=DependeesOption.NONE): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little confused how the user would specify |
||
return cls(diffspec=str(diffspec), | ||
dependees=DependeesOption(dependees)) | ||
|
||
|
||
@rule | ||
async def changes_for_diffspec_request( | ||
git: Git, | ||
changes_for_diffspec: ChangesForDiffspec, | ||
) -> QueryAddresses: | ||
changed_options = ChangedOptions( | ||
since=None, | ||
diffspec=changes_for_diffspec.diffspec, | ||
dependees=changes_for_diffspec.dependees, | ||
) | ||
changed_files = await Get(ChangedFiles, ChangedFilesRequest(changed_options, git)) | ||
changed = await Get(ChangedAddresses, ChangedRequest( | ||
sources=tuple(changed_files), | ||
dependees=changed_options.dependees, | ||
do_not_generate_subtargets=True, | ||
)) | ||
return QueryAddresses(changed) | ||
|
||
|
||
_T = TypeVar('_T', bound=QueryComponent) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class KnownQueryExpressions: | ||
components: Dict[str, Type[_T]] | ||
|
||
|
||
@rule | ||
def known_query_expressions(union_membership: UnionMembership) -> KnownQueryExpressions: | ||
return KnownQueryExpressions({ | ||
union_member.function_name: union_member | ||
for union_member in union_membership[QueryComponent] | ||
}) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class QueryParseInput: | ||
expr: str | ||
|
||
|
||
class QueryParseError(Exception): pass | ||
|
||
|
||
@dataclass(frozen=True) | ||
class QueryComponentWrapper: | ||
underlying: _T | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ParsedPythonesqueFunctionCall: | ||
"""Representation of a limited form of python named function calls.""" | ||
function_name: str | ||
positional_args: Tuple[Any, ...] | ||
keyword_args: Dict[str, Any] | ||
|
||
|
||
def _parse_python_arg(arg_value: ast.AST) -> Any: | ||
"""Convert an AST node for the argument of a function call into its literal value.""" | ||
return ast.literal_eval(arg_value) | ||
|
||
|
||
def _parse_python_esque_function_call(expr: str) -> ParsedPythonesqueFunctionCall: | ||
"""Parse a string into a description of a python function call expression.""" | ||
try: | ||
query_expression = ast.parse(expr).body[0].value | ||
except Exception as e: | ||
raise QueryParseError(f'Error parsing query expression: {e}') from e | ||
|
||
if not isinstance(query_expression, ast.Call): | ||
type_name = type(query_expression).__name__ | ||
raise QueryParseError( | ||
f'Query expression must be a single function call, but received {type_name}: ' | ||
f'{ast.dump(query_expression)}.') | ||
|
||
func_expr = query_expression.func | ||
if not isinstance(func_expr, ast.Name): | ||
raise QueryParseError('Function call in query expression should just be a name, but ' | ||
f'received {type(func_expr).__name__}: {ast.dump(func_expr)}.') | ||
function_name = func_expr.id | ||
|
||
positional_args = [_parse_python_arg(x) for x in query_expression.args] | ||
keyword_args = { | ||
k.arg: _parse_python_arg(k.value) | ||
for k in query_expression.keywords | ||
} | ||
|
||
return ParsedPythonesqueFunctionCall( | ||
function_name=function_name, | ||
positional_args=positional_args, | ||
keyword_args=keyword_args, | ||
) | ||
|
||
|
||
# TODO: allow returning an @union to avoid having to use this QueryComponentWrapper for type | ||
# erasure. | ||
@rule | ||
def parse_query_expr(s: QueryParseInput, known: KnownQueryExpressions) -> QueryComponentWrapper: | ||
"""Parse the input string and attempt to find a query function matching the function call. | ||
|
||
:return: A query component which can be resolved into `BuildFileAddresses` in the v2 engine. | ||
""" | ||
try: | ||
parsed_function_call = _parse_python_esque_function_call(s.expr) | ||
except Exception as e: | ||
raise QueryParseError(f'Error parsing expression {s}: {e}.') from e | ||
|
||
name = parsed_function_call.function_name | ||
args = parsed_function_call.positional_args | ||
kwargs = parsed_function_call.keyword_args | ||
|
||
selected_function = known.components.get(name, None) | ||
if selected_function: | ||
return QueryComponentWrapper(selected_function.parse_from_args(*args, **kwargs)) | ||
else: | ||
raise QueryParseError( | ||
f'Query function with name {name} not found (in expr {s})! ' | ||
f'The known functions are: {known}.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very nice. |
||
|
||
|
||
def rules(): | ||
return [ | ||
RootRule(OwnerOf), | ||
RootRule(ChangesSince), | ||
RootRule(QueryParseInput), | ||
RootRule(ChangesForDiffspec), | ||
known_query_expressions, | ||
UnionRule(QueryComponent, OwnerOf), | ||
UnionRule(QueryComponent, ChangesSince), | ||
UnionRule(QueryComponent, ChangesForDiffspec), | ||
owner_of_request, | ||
since_request, | ||
changes_for_diffspec_request, | ||
parse_query_expr, | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,10 +17,18 @@ | |
Specs, | ||
) | ||
from pants.engine.internals.scheduler import SchedulerSession | ||
from pants.engine.query import QueryAddresses, QueryComponentWrapper, QueryParseInput | ||
from pants.engine.selectors import Params | ||
from pants.option.options import Options | ||
from pants.option.options_bootstrapper import OptionsBootstrapper | ||
from pants.scm.subsystems.changed import ChangedAddresses, ChangedOptions, ChangedRequest | ||
from pants.scm.git import Git | ||
from pants.scm.subsystems.changed import ( | ||
ChangedAddresses, | ||
ChangedFiles, | ||
ChangedFilesRequest, | ||
ChangedOptions, | ||
ChangedRequest, | ||
) | ||
from pants.util.ordered_set import OrderedSet | ||
|
||
logger = logging.getLogger(__name__) | ||
|
@@ -74,17 +82,25 @@ def create( | |
exclude_patterns: Optional[Iterable[str]] = None, | ||
tags: Optional[Iterable[str]] = None, | ||
) -> Specs: | ||
# Determine the literal specs. | ||
specs = cls.parse_specs( | ||
raw_specs=options.specs, | ||
build_root=build_root, | ||
exclude_patterns=exclude_patterns, | ||
tags=tags, | ||
) | ||
|
||
# Determine `Changed` arguments directly from options to support pre-`Subsystem` | ||
# initialization paths. | ||
changed_options = ChangedOptions.from_options(options.for_scope("changed")) | ||
|
||
# Parse --query expressions into objects which can be resolved into BuildFileAddresses via | ||
# v2 rules. | ||
Comment on lines
+97
to
+98
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stale - We now use |
||
query_expr_strings = options.for_global_scope().query | ||
|
||
logger.debug("specs are: %s", specs) | ||
logger.debug("changed_options are: %s", changed_options) | ||
logger.debug("query exprs are: %s", query_expr_strings) | ||
|
||
if specs.provided and changed_options.provided: | ||
changed_name = "--changed-since" if changed_options.since else "--changed-diffspec" | ||
|
@@ -99,17 +115,19 @@ def create( | |
"use only one." | ||
) | ||
|
||
if not changed_options.provided: | ||
if not (changed_options.provided or query_expr_strings): | ||
return specs | ||
|
||
scm = get_scm() | ||
if not scm: | ||
git = get_scm() | ||
if not git: | ||
raise InvalidSpecConstraint( | ||
"The `--changed-*` options are not available without a recognized SCM (usually " | ||
"Git)." | ||
"{} are not available without a recognized SCM (currently just git)." | ||
) | ||
assert isinstance(git, Git) | ||
(changed_files,) = session.product_request(ChangedFiles, [ | ||
Params(ChangedFilesRequest(changed_options, git=git))]) | ||
changed_request = ChangedRequest( | ||
sources=tuple(changed_options.changed_files(scm=scm)), | ||
sources=tuple(changed_files), | ||
dependees=changed_options.dependees, | ||
) | ||
(changed_addresses,) = session.product_request( | ||
|
@@ -125,6 +143,36 @@ def create( | |
filesystem_specs.append(FilesystemLiteralSpec(file_name)) | ||
else: | ||
address_specs.append(SingleAddress(address.spec_path, address.target_name)) | ||
|
||
|
||
if query_expr_strings: | ||
# TODO(#7346): deprecate --owner-of and --changed-* in favor of --query versions, allow | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not certain if we'll want to remove I'm not sure; maybe the consistency would be worth it. |
||
# pipelining of successive query expressions with the command-line target specs as the | ||
# initial input! | ||
if len(query_expr_strings) > 1: | ||
raise ValueError("Only one --query argument is currently supported! " | ||
f"Received: {query_expr_strings}.") | ||
|
||
# TODO: allow returning @union types to avoid this double synchronous engine invocation! | ||
exprs = session.product_request( | ||
QueryComponentWrapper, [QueryParseInput(s) for s in query_expr_strings] | ||
) | ||
exprs = [ex.underlying for ex in exprs] | ||
Comment on lines
+156
to
+160
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternatively, you might consider not using the engine for parsing? It looks like the parsing is a pure function, so it would likely be cheaper not to bounce in and out like this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1. Beyond being cheaper perf wise, it would likely be simpler code as pure functions are easier to work with and test than rules. |
||
|
||
(expr_addresses,) = session.product_request( | ||
QueryAddresses, [Params(git, exprs[0], options_bootstrapper)] | ||
) | ||
logger.debug("expr addresses: %s", expr_addresses) | ||
dependencies = tuple( | ||
SingleAddress(a.spec_path, a.target_name) for a in expr_addresses | ||
) | ||
return Specs( | ||
address_specs=AddressSpecs( | ||
dependencies=dependencies, exclude_patterns=exclude_patterns, tags=tags | ||
), | ||
filesystem_specs=FilesystemSpecs(filesystem_specs), | ||
) | ||
|
||
return Specs( | ||
address_specs=AddressSpecs( | ||
address_specs, exclude_patterns=exclude_patterns, tags=tags, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Totally fine to do this for now. I've found generated subtargets unusually confusing to reason about, and I'm proposing in #10423 that we seriously consider removing the (mis)feature.
If we do stick with generated subtargets, do you have any thoughts on how they should integrate with
--query
? Generally, we've said that we want to use generated subtargets when there's enough precision to indicate that the user wants it, such as file arguments or--changed-since
. The tricky part with--query
from what I can tell is that you will sometimes have files via--changed-since
, but sometimes have normal addresses. So possibly we should always use normal addresses?