From 6e7e1b7e740c1fe0a5b34a7cf611049088172f19 Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Sat, 25 Apr 2020 10:28:55 -0700 Subject: [PATCH 01/11] Add generic mechanism to codegen sources in V2 # Delete this line to force CI to run Clippy and the Rust tests. [ci skip-rust-tests] # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 231 ++++++++++++++++--------- src/python/pants/engine/target_test.py | 108 +++++++++++- 2 files changed, 260 insertions(+), 79 deletions(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 2fe26e12029..0ba7ab37411 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -38,7 +38,7 @@ from pants.engine.legacy.structs import BundleAdaptor from pants.engine.rules import RootRule, rule from pants.engine.selectors import Get -from pants.engine.unions import UnionMembership +from pants.engine.unions import UnionMembership, union from pants.source.wrapped_globs import EagerFilesetWithSpec, FilesetRelPathWrapper, Filespec from pants.util.collections import ensure_list, ensure_str_list from pants.util.frozendict import FrozenDict @@ -1162,83 +1162,10 @@ def compute_value( # ----------------------------------------------------------------------------------------------- -# Common Fields used across most targets +# Sources and codegen # ----------------------------------------------------------------------------------------------- -class Tags(StringSequenceField): - """Arbitrary strings that you can use to describe a target. - - For example, you may tag some test targets with 'integration_test' so that you could run - `./pants --tags='integration_test' test ::` to only run on targets with that tag. - """ - - alias = "tags" - - -class DescriptionField(StringField): - """A human-readable description of the target. - - Use `./pants list --documented ::` to see all targets with descriptions. - """ - - alias = "description" - - -# TODO(#9388): remove? We don't want this in V2, but maybe keep it for V1. -class NoCacheField(BoolField): - """If True, don't store results for this target in the V1 cache.""" - - alias = "no_cache" - default = False - v1_only = True - - -# TODO(#9388): remove? -class ScopeField(StringField): - """A V1-only field for the scope of the target, which is used by the JVM to determine the - target's inclusion in the class path. - - See `pants.build_graph.target_scopes.Scopes`. - """ - - alias = "scope" - v1_only = True - - -# TODO(#9388): Remove. -class IntransitiveField(BoolField): - alias = "_transitive" - default = False - v1_only = True - - -COMMON_TARGET_FIELDS = (Tags, DescriptionField, NoCacheField, ScopeField, IntransitiveField) - - -# NB: To hydrate the dependencies into Targets, use -# `await Get[Targets](Addresses(tgt[Dependencies].value)`. -class Dependencies(PrimitiveField): - """Addresses to other targets that this target depends on, e.g. `['src/python/project:lib']`.""" - - alias = "dependencies" - value: Optional[Tuple[Address, ...]] - default = None - - # NB: The type hint for `raw_value` is a lie. While we do expect end-users to use - # Iterable[str], the Struct and Addressable code will have already converted those strings - # into a List[Address]. But, that's an implementation detail and we don't want our - # documentation, which is auto-generated from these type hints, to leak that. - @classmethod - def compute_value( - cls, raw_value: Optional[Iterable[str]], *, address: Address - ) -> Optional[Tuple[Address, ...]]: - value_or_default = super().compute_value(raw_value, address=address) - if value_or_default is None: - return None - return tuple(sorted(value_or_default)) - - class Sources(AsyncField): """A list of files and globs that belong to this target. @@ -1346,6 +1273,7 @@ def filespec(self) -> Filespec: @dataclass(frozen=True) class HydrateSourcesRequest: field: Sources + codegen_language: Optional[Type[Sources]] = None @dataclass(frozen=True) @@ -1357,9 +1285,55 @@ def eager_fileset_with_spec(self, *, address: Address) -> EagerFilesetWithSpec: return EagerFilesetWithSpec(address.spec_path, self.filespec, self.snapshot) +class CodegenSources(Sources): + pass + + +@union +@dataclass(frozen=True) +class GenerateSourcesRequest: + """A request to go from protocol sources -> a particular language. + + This should be subclassed for each distinct codegen implementation. The subclasses must define + the class properties `input` and `output`. The subclass must also be registered via + `UnionRule(GenerateSourcesRequest, GenerateFortranFromAvroRequest)`, for example. + + The rule to actually implement the codegen should take the subclass as input, and it must + return `GeneratedSources`. + + For example: + + class GenerateFortranFromAvroRequest: + input = AvroSources + output = FortranSources + + @rule + def generate_fortran_from_avro(request: GenerateFortranFromAvroRequest) -> GeneratedSources: + ... + + def rules(): + return [ + generate_fortran_from_avro, + UnionRule(GenerateSourcesRequest, GenerateFortranFromAvroRequest), + ] + """ + + protocol_sources: Snapshot + + input: ClassVar[Type[CodegenSources]] + output: ClassVar[Type[Sources]] + + +@dataclass(frozen=True) +class GeneratedSources: + snapshot: Snapshot + + @rule async def hydrate_sources( - request: HydrateSourcesRequest, glob_match_error_behavior: GlobMatchErrorBehavior + request: HydrateSourcesRequest, + glob_match_error_behavior: GlobMatchErrorBehavior, + union_membership: UnionMembership, ) -> HydratedSources: sources_field = request.field globs = sources_field.sanitized_raw_value @@ -1387,7 +1361,109 @@ async def hydrate_sources( ) ) sources_field.validate_snapshot(snapshot) - return HydratedSources(snapshot, sources_field.filespec) + + should_generate_sources = request.codegen_language is not None and isinstance( + sources_field, CodegenSources + ) + if not should_generate_sources: + return HydratedSources(snapshot, sources_field.filespec) + + generate_request_types: Iterable[Type[GenerateSourcesRequest]] = union_membership.union_rules[ + GenerateSourcesRequest + ] + relevant_generate_request_types = [ + generate_request_type + for generate_request_type in generate_request_types + if isinstance(sources_field, generate_request_type.input) + and generate_request_type.output == request.codegen_language + ] + if not relevant_generate_request_types: + return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec) + if len(relevant_generate_request_types) > 1: + raise ValueError() + generate_request_type = relevant_generate_request_types[0] + generated_sources = await Get[GeneratedSources]( + GenerateSourcesRequest, generate_request_type(snapshot) + ) + return HydratedSources(generated_sources.snapshot, sources_field.filespec) + + +# ----------------------------------------------------------------------------------------------- +# Other common Fields used across most targets +# ----------------------------------------------------------------------------------------------- + + +class Tags(StringSequenceField): + """Arbitrary strings that you can use to describe a target. + + For example, you may tag some test targets with 'integration_test' so that you could run + `./pants --tags='integration_test' test ::` to only run on targets with that tag. + """ + + alias = "tags" + + +class DescriptionField(StringField): + """A human-readable description of the target. + + Use `./pants list --documented ::` to see all targets with descriptions. + """ + + alias = "description" + + +# TODO(#9388): remove? We don't want this in V2, but maybe keep it for V1. +class NoCacheField(BoolField): + """If True, don't store results for this target in the V1 cache.""" + + alias = "no_cache" + default = False + v1_only = True + + +# TODO(#9388): remove? +class ScopeField(StringField): + """A V1-only field for the scope of the target, which is used by the JVM to determine the + target's inclusion in the class path. + + See `pants.build_graph.target_scopes.Scopes`. + """ + + alias = "scope" + v1_only = True + + +# TODO(#9388): Remove. +class IntransitiveField(BoolField): + alias = "_transitive" + default = False + v1_only = True + + +COMMON_TARGET_FIELDS = (Tags, DescriptionField, NoCacheField, ScopeField, IntransitiveField) + + +# NB: To hydrate the dependencies into Targets, use +# `await Get[Targets](Addresses(tgt[Dependencies].value)`. +class Dependencies(PrimitiveField): + """Addresses to other targets that this target depends on, e.g. `['src/python/project:lib']`.""" + + alias = "dependencies" + value: Optional[Tuple[Address, ...]] + default = None + + # NB: The type hint for `raw_value` is a lie. While we do expect end-users to use + # Iterable[str], the Struct and Addressable code will have already converted those strings + # into a List[Address]. But, that's an implementation detail and we don't want our + # documentation, which is auto-generated from these type hints, to leak that. + @classmethod + def compute_value( + cls, raw_value: Optional[Iterable[str]], *, address: Address + ) -> Optional[Tuple[Address, ...]]: + value_or_default = super().compute_value(raw_value, address=address) + if value_or_default is None: + return None + return tuple(sorted(value_or_default)) # TODO: figure out what support looks like for this with the Target API. The expected value is an @@ -1444,5 +1520,6 @@ def rules(): find_valid_configurations, hydrate_sources, RootRule(TargetsToValidConfigurationsRequest), + RootRule(GenerateSourcesRequest), RootRule(HydrateSourcesRequest), ] diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py index c2607a798dc..9b9d641da04 100644 --- a/src/python/pants/engine/target_test.py +++ b/src/python/pants/engine/target_test.py @@ -11,7 +11,13 @@ from pants.base.specs import FilesystemLiteralSpec from pants.engine.addresses import Address -from pants.engine.fs import EMPTY_DIRECTORY_DIGEST, PathGlobs, Snapshot +from pants.engine.fs import ( + EMPTY_DIRECTORY_DIGEST, + FileContent, + InputFilesContent, + PathGlobs, + Snapshot, +) from pants.engine.internals.scheduler import ExecutionError from pants.engine.rules import RootRule, rule from pants.engine.selectors import Get, Params @@ -19,10 +25,13 @@ AmbiguousImplementationsException, AsyncField, BoolField, + CodegenSources, Configuration, ConfigurationWithOrigin, DictStringToStringField, DictStringToStringSequenceField, + GeneratedSources, + GenerateSourcesRequest, HydratedSources, HydrateSourcesRequest, InvalidFieldChoiceException, @@ -751,7 +760,7 @@ def assert_invalid_type(raw_value: Any) -> None: # ----------------------------------------------------------------------------------------------- -# Test common fields +# Test Sources # ----------------------------------------------------------------------------------------------- @@ -866,3 +875,98 @@ def hydrate(sources_cls: Type[Sources], sources: Iterable[str]) -> HydratedSourc "f2.txt", "f3.txt", ) + + +# ----------------------------------------------------------------------------------------------- +# Test Codegen +# ----------------------------------------------------------------------------------------------- + + +class AvroSources(CodegenSources): + pass + + +class AvroLibrary(Target): + alias = "avro_library" + core_fields = (AvroSources,) + + +class GenerateFortranFromAvroRequest(GenerateSourcesRequest): + input = AvroSources + output = FortranSources + + +@rule +async def generate_fortran_from_avro(request: GenerateFortranFromAvroRequest) -> GeneratedSources: + protocol_files = request.protocol_sources.files + + def generate_fortran(fp: str) -> FileContent: + parent = str(PurePath(fp).parent).replace("src/avro", "src/fortran") + file_name = f"{PurePath(fp).stem}.f95" + return FileContent(str(PurePath(parent, file_name)), b"Generated") + + result = await Get[Snapshot](InputFilesContent([generate_fortran(fp) for fp in protocol_files])) + return GeneratedSources(result) + + +class TestCodegen(TestBase): + @classmethod + def rules(cls): + return ( + *super().rules(), + *target_rules(), + generate_fortran_from_avro, + RootRule(GenerateFortranFromAvroRequest), + RootRule(HydrateSourcesRequest), + UnionRule(GenerateSourcesRequest, GenerateFortranFromAvroRequest), + ) + + def test_generate_sources(self) -> None: + addr = Address.parse("src/avro:lib") + self.create_files("src/avro", files=["f.avro"]) + protocol_sources = AvroSources(["*.avro"], address=addr) + + # First, get the original protocol sources + hydrated_protocol_sources = self.request_single_product( + HydratedSources, HydrateSourcesRequest(protocol_sources) + ) + assert hydrated_protocol_sources.snapshot.files == ("src/avro/f.avro",) + + # Test directly feeding the protocol sources into the codegen rule + generated_sources = self.request_single_product( + GeneratedSources, GenerateFortranFromAvroRequest(hydrated_protocol_sources.snapshot) + ) + assert generated_sources.snapshot.files == ("src/fortran/f.f95",) + + # Test that HydrateSourcesRequest can also be used + generated_via_hydrate_sources = self.request_single_product( + HydratedSources, + HydrateSourcesRequest(protocol_sources, codegen_language=FortranSources), + ) + assert generated_via_hydrate_sources.snapshot.files == ("src/fortran/f.f95",) + + def test_works_with_subclass_fields(self) -> None: + class CustomAvroSources(AvroSources): + pass + + addr = Address.parse("src/avro:lib") + self.create_files("src/avro", files=["f.avro"]) + protocol_sources = CustomAvroSources(["*.avro"], address=addr) + generated = self.request_single_product( + HydratedSources, + HydrateSourcesRequest(protocol_sources, codegen_language=FortranSources), + ) + assert generated.snapshot.files == ("src/fortran/f.f95",) + + def test_cannot_generate_language(self) -> None: + class SmalltalkSources(Sources): + pass + + addr = Address.parse("src/avro:lib") + self.create_files("src/avro", files=["f.avro"]) + protocol_sources = AvroSources(["*.avro"], address=addr) + generated = self.request_single_product( + HydratedSources, + HydrateSourcesRequest(protocol_sources, codegen_language=SmalltalkSources), + ) + assert generated.snapshot.files == () From 7877e89ab7eb2aaf6696dbf1265ba2736af78084 Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Sat, 25 Apr 2020 11:05:04 -0700 Subject: [PATCH 02/11] Add proper exception message for ambiguous codegen implementations # Delete this line to force CI to run Clippy and the Rust tests. [ci skip-rust-tests] # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 22 +++++++++++++++++++--- src/python/pants/engine/target_test.py | 6 +++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 0ba7ab37411..b758d216809 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -915,12 +915,28 @@ def __init__( bulleted_list_sep = "\n * " super().__init__( f"Multiple of the registered implementations for {goal_description} work for " - f"{target.address} (target type {repr(target.alias)}).\n\n" - "It is ambiguous which implementation to use. Possible implementations:" + f"{target.address} (target type {repr(target.alias)}). It is ambiguous which " + "implementation to use.\n\nPossible implementations:" f"{bulleted_list_sep}{bulleted_list_sep.join(possible_config_types)}" ) +class AmbiguousCodegenImplementationsException(Exception): + """Exception for when there are multiple codegen implementations for the same path.""" + + def __init__(self, generators: Iterable[Type["GenerateSourcesRequest"]],) -> None: + example_generator = list(generators)[0] + input = example_generator.input.__name__ + output = example_generator.output.__name__ + possible_generators = sorted(generator.__name__ for generator in generators) + bulleted_list_sep = "\n * " + super().__init__( + f"Multiple of the registered code generators can generate {output} given {input}. It " + "is ambiguous which implementation to use.\n\nPossible implementations:" + f"{bulleted_list_sep}{bulleted_list_sep.join(possible_generators)}" + ) + + # ----------------------------------------------------------------------------------------------- # Field templates # ----------------------------------------------------------------------------------------------- @@ -1380,7 +1396,7 @@ async def hydrate_sources( if not relevant_generate_request_types: return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec) if len(relevant_generate_request_types) > 1: - raise ValueError() + raise AmbiguousCodegenImplementationsException(relevant_generate_request_types) generate_request_type = relevant_generate_request_types[0] generated_sources = await Get[GeneratedSources]( GenerateSourcesRequest, generate_request_type(snapshot) diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py index 9b9d641da04..30b80853df6 100644 --- a/src/python/pants/engine/target_test.py +++ b/src/python/pants/engine/target_test.py @@ -926,19 +926,19 @@ def test_generate_sources(self) -> None: self.create_files("src/avro", files=["f.avro"]) protocol_sources = AvroSources(["*.avro"], address=addr) - # First, get the original protocol sources + # First, get the original protocol sources. hydrated_protocol_sources = self.request_single_product( HydratedSources, HydrateSourcesRequest(protocol_sources) ) assert hydrated_protocol_sources.snapshot.files == ("src/avro/f.avro",) - # Test directly feeding the protocol sources into the codegen rule + # Test directly feeding the protocol sources into the codegen rule. generated_sources = self.request_single_product( GeneratedSources, GenerateFortranFromAvroRequest(hydrated_protocol_sources.snapshot) ) assert generated_sources.snapshot.files == ("src/fortran/f.f95",) - # Test that HydrateSourcesRequest can also be used + # Test that HydrateSourcesRequest can also be used. generated_via_hydrate_sources = self.request_single_product( HydratedSources, HydrateSourcesRequest(protocol_sources, codegen_language=FortranSources), From 5340f74cf9ab3c0136faf7b1ea9d69f74b9aea4d Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Mon, 27 Apr 2020 14:47:48 -0700 Subject: [PATCH 03/11] Simplify excluding _python_requirements_library() from built PEXes [ci skip-jvm-tests] [ci skip-rust-tests] --- .../backend/python/rules/importable_python_sources.py | 11 +++-------- src/python/pants/backend/python/target_types.py | 4 +--- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/python/pants/backend/python/rules/importable_python_sources.py b/src/python/pants/backend/python/rules/importable_python_sources.py index 1fbcf2782e4..cf221e7623b 100644 --- a/src/python/pants/backend/python/rules/importable_python_sources.py +++ b/src/python/pants/backend/python/rules/importable_python_sources.py @@ -5,7 +5,7 @@ from pants.backend.python.rules.inject_init import InitInjectedSnapshot, InjectInitRequest from pants.backend.python.rules.inject_init import rules as inject_init_rules -from pants.backend.python.target_types import PythonRequirementsFileSources, PythonSources +from pants.backend.python.target_types import PythonSources from pants.core.target_types import FilesSources, ResourcesSources from pants.core.util_rules import determine_source_files from pants.core.util_rules.determine_source_files import AllSourceFilesRequest, SourceFiles @@ -34,13 +34,8 @@ class ImportablePythonSources: @rule async def prepare_python_sources(targets: Targets) -> ImportablePythonSources: def is_relevant(tgt: Target) -> bool: - # NB: PythonRequirementsFileSources is a subclass of FilesSources. We filter it out so that - # requirements.txt is not included. If the user intended for the file to be included, they - # should use a normal `files()` target rather than `python_requirements()`. - return ( - tgt.has_field(PythonSources) - or tgt.has_field(ResourcesSources) - or (tgt.has_field(FilesSources) and not tgt.has_field(PythonRequirementsFileSources)) + return any( + tgt.has_field(field) for field in (PythonSources, ResourcesSources, FilesSources) ) stripped_sources = await Get[SourceFiles]( diff --git a/src/python/pants/backend/python/target_types.py b/src/python/pants/backend/python/target_types.py index ac3c147f9dd..a8f6074b16c 100644 --- a/src/python/pants/backend/python/target_types.py +++ b/src/python/pants/backend/python/target_types.py @@ -414,9 +414,7 @@ class PythonRequirementLibrary(Target): # ----------------------------------------------------------------------------------------------- -# NB: This subclasses FilesSources to ensure that we still properly handle stripping source roots, -# but we still new type so that we can distinguish between normal FilesSources vs. this field. -class PythonRequirementsFileSources(FilesSources): +class PythonRequirementsFileSources(Sources): pass From ff97ac43639dc99823d5445e6286d91cb629128e Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Mon, 27 Apr 2020 16:28:14 -0700 Subject: [PATCH 04/11] Allow `HydratedSourcesRequest` to indicate which Sources types are valid # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] [ci skip-rust-tests] --- .../python/rules/importable_python_sources.py | 11 ++--- .../backend/python/rules/run_setup_py.py | 10 ++++- .../pants/backend/python/target_types.py | 1 - .../core/util_rules/determine_source_files.py | 41 ++++++++++++++---- .../core/util_rules/strip_source_roots.py | 24 +++++++++-- src/python/pants/engine/target.py | 43 +++++++++++++++++-- src/python/pants/engine/target_test.py | 25 ++++++++++- 7 files changed, 129 insertions(+), 26 deletions(-) diff --git a/src/python/pants/backend/python/rules/importable_python_sources.py b/src/python/pants/backend/python/rules/importable_python_sources.py index cf221e7623b..335136738e6 100644 --- a/src/python/pants/backend/python/rules/importable_python_sources.py +++ b/src/python/pants/backend/python/rules/importable_python_sources.py @@ -12,7 +12,7 @@ from pants.engine.fs import Snapshot from pants.engine.rules import RootRule, rule from pants.engine.selectors import Get -from pants.engine.target import Sources, Target, Targets +from pants.engine.target import Sources, Targets @dataclass(frozen=True) @@ -33,14 +33,11 @@ class ImportablePythonSources: @rule async def prepare_python_sources(targets: Targets) -> ImportablePythonSources: - def is_relevant(tgt: Target) -> bool: - return any( - tgt.has_field(field) for field in (PythonSources, ResourcesSources, FilesSources) - ) - stripped_sources = await Get[SourceFiles]( AllSourceFilesRequest( - (tgt.get(Sources) for tgt in targets if is_relevant(tgt)), strip_source_roots=True + (tgt.get(Sources) for tgt in targets), + valid_sources_types=(PythonSources, ResourcesSources, FilesSources), + strip_source_roots=True, ) ) init_injected = await Get[InitInjectedSnapshot](InjectInitRequest(stripped_sources.snapshot)) diff --git a/src/python/pants/backend/python/rules/run_setup_py.py b/src/python/pants/backend/python/rules/run_setup_py.py index d810152b31a..08e4d9e4687 100644 --- a/src/python/pants/backend/python/rules/run_setup_py.py +++ b/src/python/pants/backend/python/rules/run_setup_py.py @@ -472,7 +472,11 @@ async def get_sources( ) -> SetupPySources: targets = request.targets stripped_srcs_list = await MultiGet( - Get[SourceRootStrippedSources](StripSourcesFieldRequest(target.get(Sources))) + Get[SourceRootStrippedSources]( + StripSourcesFieldRequest( + target.get(Sources), valid_sources_types=(PythonSources, ResourcesSources) + ) + ) for target in targets ) @@ -518,7 +522,9 @@ async def get_ancestor_init_py( """ source_roots = source_root_config.get_source_roots() sources = await Get[SourceFiles]( - AllSourceFilesRequest(tgt[PythonSources] for tgt in targets if tgt.has_field(PythonSources)) + AllSourceFilesRequest( + (tgt.get(Sources) for tgt in targets), valid_sources_types=(PythonSources,) + ) ) # Find the ancestors of all dirs containing .py files, including those dirs themselves. source_dir_ancestors: Set[Tuple[str, str]] = set() # Items are (src_root, path incl. src_root). diff --git a/src/python/pants/backend/python/target_types.py b/src/python/pants/backend/python/target_types.py index a8f6074b16c..52dbe1f650a 100644 --- a/src/python/pants/backend/python/target_types.py +++ b/src/python/pants/backend/python/target_types.py @@ -6,7 +6,6 @@ from pants.backend.python.python_artifact import PythonArtifact from pants.backend.python.subsystems.pytest import PyTest -from pants.core.target_types import FilesSources from pants.core.util_rules.determine_source_files import SourceFiles from pants.engine.addresses import Address from pants.engine.fs import Snapshot diff --git a/src/python/pants/core/util_rules/determine_source_files.py b/src/python/pants/core/util_rules/determine_source_files.py index a78985c3b1b..7a7fb3e367e 100644 --- a/src/python/pants/core/util_rules/determine_source_files.py +++ b/src/python/pants/core/util_rules/determine_source_files.py @@ -2,7 +2,7 @@ # Licensed under the Apache License, Version 2.0 (see LICENSE). from dataclasses import dataclass -from typing import Iterable, Tuple, Union +from typing import Iterable, Tuple, Type, Union from pants.base.specs import AddressSpec, OriginSpec from pants.core.util_rules import strip_source_roots @@ -35,12 +35,18 @@ def files(self) -> Tuple[str, ...]: @dataclass(unsafe_hash=True) class AllSourceFilesRequest: sources_fields: Tuple[SourcesField, ...] - strip_source_roots: bool = False + valid_sources_types: Tuple[Type[SourcesField], ...] + strip_source_roots: bool def __init__( - self, sources_fields: Iterable[SourcesField], *, strip_source_roots: bool = False + self, + sources_fields: Iterable[SourcesField], + *, + valid_sources_types: Iterable[Type[SourcesField]] = (SourcesField,), + strip_source_roots: bool = False ) -> None: self.sources_fields = tuple(sources_fields) + self.valid_sources_types = tuple(valid_sources_types) self.strip_source_roots = strip_source_roots @@ -48,15 +54,18 @@ def __init__( @dataclass(unsafe_hash=True) class SpecifiedSourceFilesRequest: sources_fields_with_origins: Tuple[Tuple[SourcesField, OriginSpec], ...] - strip_source_roots: bool = False + valid_sources_types: Tuple[Type[SourcesField], ...] + strip_source_roots: bool def __init__( self, sources_fields_with_origins: Iterable[Tuple[SourcesField, OriginSpec]], *, + valid_sources_types: Iterable[Type[SourcesField]] = (SourcesField,), strip_source_roots: bool = False ) -> None: self.sources_fields_with_origins = tuple(sources_fields_with_origins) + self.valid_sources_types = tuple(valid_sources_types) self.strip_source_roots = strip_source_roots @@ -81,7 +90,11 @@ async def determine_all_source_files(request: AllSourceFilesRequest) -> SourceFi """Merge all `Sources` fields into one Snapshot.""" if request.strip_source_roots: stripped_snapshots = await MultiGet( - Get[SourceRootStrippedSources](StripSourcesFieldRequest(sources_field)) + Get[SourceRootStrippedSources]( + StripSourcesFieldRequest( + sources_field, valid_sources_types=request.valid_sources_types + ) + ) for sources_field in request.sources_fields ) digests_to_merge = tuple( @@ -89,7 +102,11 @@ async def determine_all_source_files(request: AllSourceFilesRequest) -> SourceFi ) else: all_hydrated_sources = await MultiGet( - Get[HydratedSources](HydrateSourcesRequest(sources_field)) + Get[HydratedSources]( + HydrateSourcesRequest( + sources_field, valid_sources_types=request.valid_sources_types + ) + ) for sources_field in request.sources_fields ) digests_to_merge = tuple( @@ -104,7 +121,11 @@ async def determine_specified_source_files(request: SpecifiedSourceFilesRequest) """Determine the specified `sources` for targets, possibly finding a subset of the original `sources` fields if the user supplied file arguments.""" all_hydrated_sources = await MultiGet( - Get[HydratedSources](HydrateSourcesRequest(sources_field_with_origin[0])) + Get[HydratedSources]( + HydrateSourcesRequest( + sources_field_with_origin[0], valid_sources_types=request.valid_sources_types + ) + ) for sources_field_with_origin in request.sources_fields_with_origins ) @@ -133,7 +154,11 @@ async def determine_specified_source_files(request: SpecifiedSourceFilesRequest) all_sources_fields = (*full_snapshots.keys(), *snapshot_subset_requests.keys()) stripped_snapshots = await MultiGet( Get[SourceRootStrippedSources]( - StripSourcesFieldRequest(sources_field, specified_files_snapshot=snapshot) + StripSourcesFieldRequest( + sources_field, + specified_files_snapshot=snapshot, + valid_sources_types=request.valid_sources_types, + ) ) for sources_field, snapshot in zip(all_sources_fields, all_snapshots) ) diff --git a/src/python/pants/core/util_rules/strip_source_roots.py b/src/python/pants/core/util_rules/strip_source_roots.py index 2661079d7b4..32225ed76cd 100644 --- a/src/python/pants/core/util_rules/strip_source_roots.py +++ b/src/python/pants/core/util_rules/strip_source_roots.py @@ -4,7 +4,7 @@ import itertools from dataclasses import dataclass from pathlib import PurePath -from typing import Optional, cast +from typing import Iterable, Optional, Tuple, Type, cast from pants.core.target_types import FilesSources from pants.engine.addresses import Address @@ -23,6 +23,7 @@ from pants.engine.target import Sources as SourcesField from pants.engine.target import rules as target_rules from pants.source.source_root import NoSourceRootError, SourceRootConfig +from pants.util.meta import frozen_after_init @dataclass(frozen=True) @@ -46,7 +47,8 @@ class StripSnapshotRequest: representative_path: Optional[str] = None -@dataclass(frozen=True) +@frozen_after_init +@dataclass(unsafe_hash=True) class StripSourcesFieldRequest: """A request to strip source roots for every file in a `Sources` field. @@ -56,8 +58,20 @@ class StripSourcesFieldRequest: """ sources_field: SourcesField + valid_sources_types: Tuple[Type[SourcesField], ...] = (SourcesField,) specified_files_snapshot: Optional[Snapshot] = None + def __init__( + self, + sources_field: SourcesField, + *, + valid_sources_types: Iterable[Type[SourcesField]] = (SourcesField,), + specified_files_snapshot: Optional[Snapshot] = None, + ) -> None: + self.sources_field = sources_field + self.valid_sources_types = tuple(valid_sources_types) + self.specified_files_snapshot = specified_files_snapshot + @rule async def strip_source_roots_from_snapshot( @@ -129,7 +143,11 @@ async def strip_source_roots_from_sources_field( if request.specified_files_snapshot is not None: sources_snapshot = request.specified_files_snapshot else: - hydrated_sources = await Get[HydratedSources](HydrateSourcesRequest(request.sources_field)) + hydrated_sources = await Get[HydratedSources]( + HydrateSourcesRequest( + request.sources_field, valid_sources_types=request.valid_sources_types + ) + ) sources_snapshot = hydrated_sources.snapshot if not sources_snapshot.files: diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 2fe26e12029..ca94b33f34b 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -1343,15 +1343,39 @@ def filespec(self) -> Filespec: ) -@dataclass(frozen=True) +@frozen_after_init +@dataclass(unsafe_hash=True) class HydrateSourcesRequest: field: Sources + valid_sources_types: Tuple[Type[Sources], ...] + + def __init__( + self, field: Sources, *, valid_sources_types: Iterable[Type[Sources]] = (Sources,) + ) -> None: + """Convert raw sources globs into an instance of HydratedSources. + + If you only want to convert certain Sources fields, such as only PythonSources, set + `valid_sources_types`. Any invalid sources will return an empty `HydratedSources` instance, + indicated by the attribute `output_type = None`. + """ + self.field = field + self.valid_sources_types = tuple(valid_sources_types) @dataclass(frozen=True) class HydratedSources: + """The result of hydrating a SourcesField. + + The `output_type` will indicate which of the `HydrateSourcesRequest.valid_sources_type` the + result corresponds to, e.g. if the result comes from `FilesSources` vs. `PythonSources`. If this + value is None, then the input `Sources` field was not one of the expected types. This property + allows for switching on the result, e.g. handling hydrated files() sources differently than + hydrated Python sources. + """ + snapshot: Snapshot filespec: Filespec + output_type: Optional[Type[Sources]] def eager_fileset_with_spec(self, *, address: Address) -> EagerFilesetWithSpec: return EagerFilesetWithSpec(address.spec_path, self.filespec, self.snapshot) @@ -1362,10 +1386,21 @@ async def hydrate_sources( request: HydrateSourcesRequest, glob_match_error_behavior: GlobMatchErrorBehavior ) -> HydratedSources: sources_field = request.field - globs = sources_field.sanitized_raw_value + output_type = next( + ( + valid_type + for valid_type in request.valid_sources_types + if isinstance(sources_field, valid_type) + ), + None, + ) + if output_type is None: + return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec, output_type=None) + + globs = sources_field.sanitized_raw_value if globs is None: - return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec) + return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec, output_type=output_type) conjunction = ( GlobExpansionConjunction.all_match @@ -1387,7 +1422,7 @@ async def hydrate_sources( ) ) sources_field.validate_snapshot(snapshot) - return HydratedSources(snapshot, sources_field.filespec) + return HydratedSources(snapshot, sources_field.filespec, output_type=output_type) # TODO: figure out what support looks like for this with the Target API. The expected value is an diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py index c2607a798dc..46839088a40 100644 --- a/src/python/pants/engine/target_test.py +++ b/src/python/pants/engine/target_test.py @@ -751,7 +751,7 @@ def assert_invalid_type(raw_value: Any) -> None: # ----------------------------------------------------------------------------------------------- -# Test common fields +# Test Sources # ----------------------------------------------------------------------------------------------- @@ -791,6 +791,29 @@ def test_normal_hydration(self) -> None: "exclude": [{"globs": ["src/fortran/**/ignore*", "src/fortran/ignored.f03"]}], } + def test_output_type(self) -> None: + class SourcesSubclass(Sources): + pass + + addr = Address.parse(":lib") + self.create_files("", files=["f1.f95"]) + + valid_sources = SourcesSubclass(["*"], address=addr) + hydrated_valid_sources = self.request_single_product( + HydratedSources, + HydrateSourcesRequest(valid_sources, valid_sources_types=[SourcesSubclass]), + ) + assert hydrated_valid_sources.snapshot.files == ("f1.f95",) + assert hydrated_valid_sources.output_type == SourcesSubclass + + invalid_sources = Sources(["*"], address=addr) + hydrated_invalid_sources = self.request_single_product( + HydratedSources, + HydrateSourcesRequest(invalid_sources, valid_sources_types=[SourcesSubclass]), + ) + assert hydrated_invalid_sources.snapshot.files == () + assert hydrated_invalid_sources.output_type is None + def test_unmatched_globs(self) -> None: self.create_files("", files=["f1.f95"]) sources = Sources(["non_existent.f95"], address=Address.parse(":lib")) From 9fe244a6bf0f6e69d9f6f8a86b668cf89550b73b Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Mon, 27 Apr 2020 17:11:28 -0700 Subject: [PATCH 05/11] Move around some of `target.py` to better organize this new code # Delete this line to force CI to run Clippy and the Rust tests. [ci skip-rust-tests] # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 153 +++++++++++++++--------------- 1 file changed, 79 insertions(+), 74 deletions(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index ca94b33f34b..4ab7f9b22b9 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -1162,83 +1162,10 @@ def compute_value( # ----------------------------------------------------------------------------------------------- -# Common Fields used across most targets +# Sources # ----------------------------------------------------------------------------------------------- -class Tags(StringSequenceField): - """Arbitrary strings that you can use to describe a target. - - For example, you may tag some test targets with 'integration_test' so that you could run - `./pants --tags='integration_test' test ::` to only run on targets with that tag. - """ - - alias = "tags" - - -class DescriptionField(StringField): - """A human-readable description of the target. - - Use `./pants list --documented ::` to see all targets with descriptions. - """ - - alias = "description" - - -# TODO(#9388): remove? We don't want this in V2, but maybe keep it for V1. -class NoCacheField(BoolField): - """If True, don't store results for this target in the V1 cache.""" - - alias = "no_cache" - default = False - v1_only = True - - -# TODO(#9388): remove? -class ScopeField(StringField): - """A V1-only field for the scope of the target, which is used by the JVM to determine the - target's inclusion in the class path. - - See `pants.build_graph.target_scopes.Scopes`. - """ - - alias = "scope" - v1_only = True - - -# TODO(#9388): Remove. -class IntransitiveField(BoolField): - alias = "_transitive" - default = False - v1_only = True - - -COMMON_TARGET_FIELDS = (Tags, DescriptionField, NoCacheField, ScopeField, IntransitiveField) - - -# NB: To hydrate the dependencies into Targets, use -# `await Get[Targets](Addresses(tgt[Dependencies].value)`. -class Dependencies(PrimitiveField): - """Addresses to other targets that this target depends on, e.g. `['src/python/project:lib']`.""" - - alias = "dependencies" - value: Optional[Tuple[Address, ...]] - default = None - - # NB: The type hint for `raw_value` is a lie. While we do expect end-users to use - # Iterable[str], the Struct and Addressable code will have already converted those strings - # into a List[Address]. But, that's an implementation detail and we don't want our - # documentation, which is auto-generated from these type hints, to leak that. - @classmethod - def compute_value( - cls, raw_value: Optional[Iterable[str]], *, address: Address - ) -> Optional[Tuple[Address, ...]]: - value_or_default = super().compute_value(raw_value, address=address) - if value_or_default is None: - return None - return tuple(sorted(value_or_default)) - - class Sources(AsyncField): """A list of files and globs that belong to this target. @@ -1425,6 +1352,84 @@ async def hydrate_sources( return HydratedSources(snapshot, sources_field.filespec, output_type=output_type) +# ----------------------------------------------------------------------------------------------- +# Other common Fields used across most targets +# ----------------------------------------------------------------------------------------------- + + +class Tags(StringSequenceField): + """Arbitrary strings that you can use to describe a target. + + For example, you may tag some test targets with 'integration_test' so that you could run + `./pants --tags='integration_test' test ::` to only run on targets with that tag. + """ + + alias = "tags" + + +class DescriptionField(StringField): + """A human-readable description of the target. + + Use `./pants list --documented ::` to see all targets with descriptions. + """ + + alias = "description" + + +# TODO(#9388): remove? We don't want this in V2, but maybe keep it for V1. +class NoCacheField(BoolField): + """If True, don't store results for this target in the V1 cache.""" + + alias = "no_cache" + default = False + v1_only = True + + +# TODO(#9388): remove? +class ScopeField(StringField): + """A V1-only field for the scope of the target, which is used by the JVM to determine the + target's inclusion in the class path. + + See `pants.build_graph.target_scopes.Scopes`. + """ + + alias = "scope" + v1_only = True + + +# TODO(#9388): Remove. +class IntransitiveField(BoolField): + alias = "_transitive" + default = False + v1_only = True + + +COMMON_TARGET_FIELDS = (Tags, DescriptionField, NoCacheField, ScopeField, IntransitiveField) + + +# NB: To hydrate the dependencies into Targets, use +# `await Get[Targets](Addresses(tgt[Dependencies].value)`. +class Dependencies(PrimitiveField): + """Addresses to other targets that this target depends on, e.g. `['src/python/project:lib']`.""" + + alias = "dependencies" + value: Optional[Tuple[Address, ...]] + default = None + + # NB: The type hint for `raw_value` is a lie. While we do expect end-users to use + # Iterable[str], the Struct and Addressable code will have already converted those strings + # into a List[Address]. But, that's an implementation detail and we don't want our + # documentation, which is auto-generated from these type hints, to leak that. + @classmethod + def compute_value( + cls, raw_value: Optional[Iterable[str]], *, address: Address + ) -> Optional[Tuple[Address, ...]]: + value_or_default = super().compute_value(raw_value, address=address) + if value_or_default is None: + return None + return tuple(sorted(value_or_default)) + + # TODO: figure out what support looks like for this with the Target API. The expected value is an # Artifact, but V1 has no common Artifact interface. class ProvidesField(PrimitiveField): From bdbb7743e91f05be8721d9e21a1da3308a419d96 Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Mon, 27 Apr 2020 20:42:55 -0700 Subject: [PATCH 06/11] Pass the entire Target to GenerateSourcesRequest Stu and Benjy pointed out that it's common for a target to have fields on it that influence how the compiler runs. So, it will be common for the codegen rules to access the original target. While we could expect rules to call `await Get[WrappedTarget](Address)` directly in the rules, this makes life simpler for them. # Delete this line to force CI to run Clippy and the Rust tests. [ci skip-rust-tests] # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 6 ++++-- src/python/pants/engine/target_test.py | 26 ++++++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index b758d216809..44b0db1d962 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -306,6 +306,7 @@ def __init__( def field_types(self) -> Tuple[Type[Field], ...]: return (*self.core_fields, *self.plugin_fields) + @union @final class PluginField: """A sentinel class to allow plugin authors to add additional fields to this target type. @@ -1335,6 +1336,7 @@ def rules(): """ protocol_sources: Snapshot + protocol_target: Target input: ClassVar[Type[CodegenSources]] output: ClassVar[Type[Sources]] @@ -1398,8 +1400,9 @@ async def hydrate_sources( if len(relevant_generate_request_types) > 1: raise AmbiguousCodegenImplementationsException(relevant_generate_request_types) generate_request_type = relevant_generate_request_types[0] + wrapped_protocol_target = await Get[WrappedTarget](Address, sources_field.address) generated_sources = await Get[GeneratedSources]( - GenerateSourcesRequest, generate_request_type(snapshot) + GenerateSourcesRequest, generate_request_type(snapshot, wrapped_protocol_target.target) ) return HydratedSources(generated_sources.snapshot, sources_field.filespec) @@ -1536,6 +1539,5 @@ def rules(): find_valid_configurations, hydrate_sources, RootRule(TargetsToValidConfigurationsRequest), - RootRule(GenerateSourcesRequest), RootRule(HydrateSourcesRequest), ] diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py index 30b80853df6..42f97457c94 100644 --- a/src/python/pants/engine/target_test.py +++ b/src/python/pants/engine/target_test.py @@ -52,6 +52,7 @@ TargetsWithOrigins, TargetWithOrigin, TooManyTargetsException, + WrappedTarget, ) from pants.engine.target import rules as target_rules from pants.engine.unions import UnionMembership, UnionRule, union @@ -921,10 +922,17 @@ def rules(cls): UnionRule(GenerateSourcesRequest, GenerateFortranFromAvroRequest), ) - def test_generate_sources(self) -> None: - addr = Address.parse("src/avro:lib") + @classmethod + def target_types(cls): + return [AvroLibrary] + + def setUp(self) -> None: + self.address = Address.parse("src/avro:lib") self.create_files("src/avro", files=["f.avro"]) - protocol_sources = AvroSources(["*.avro"], address=addr) + self.add_to_build_file("src/avro", "avro_library(name='lib', sources=['*.avro'])") + + def test_generate_sources(self) -> None: + protocol_sources = AvroSources(["*.avro"], address=self.address) # First, get the original protocol sources. hydrated_protocol_sources = self.request_single_product( @@ -933,8 +941,10 @@ def test_generate_sources(self) -> None: assert hydrated_protocol_sources.snapshot.files == ("src/avro/f.avro",) # Test directly feeding the protocol sources into the codegen rule. + wrapped_tgt = self.request_single_product(WrappedTarget, self.address) generated_sources = self.request_single_product( - GeneratedSources, GenerateFortranFromAvroRequest(hydrated_protocol_sources.snapshot) + GeneratedSources, + GenerateFortranFromAvroRequest(hydrated_protocol_sources.snapshot, wrapped_tgt.target), ) assert generated_sources.snapshot.files == ("src/fortran/f.f95",) @@ -949,9 +959,7 @@ def test_works_with_subclass_fields(self) -> None: class CustomAvroSources(AvroSources): pass - addr = Address.parse("src/avro:lib") - self.create_files("src/avro", files=["f.avro"]) - protocol_sources = CustomAvroSources(["*.avro"], address=addr) + protocol_sources = CustomAvroSources(["*.avro"], address=self.address) generated = self.request_single_product( HydratedSources, HydrateSourcesRequest(protocol_sources, codegen_language=FortranSources), @@ -962,9 +970,7 @@ def test_cannot_generate_language(self) -> None: class SmalltalkSources(Sources): pass - addr = Address.parse("src/avro:lib") - self.create_files("src/avro", files=["f.avro"]) - protocol_sources = AvroSources(["*.avro"], address=addr) + protocol_sources = AvroSources(["*.avro"], address=self.address) generated = self.request_single_product( HydratedSources, HydrateSourcesRequest(protocol_sources, codegen_language=SmalltalkSources), From 3596a9cd48480b7e23d98a4d84dbf7ba741cdb92 Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Tue, 28 Apr 2020 08:50:13 -0700 Subject: [PATCH 07/11] Add `Sources.can_generate()` to allow pre-filtering before hydration We need this for run_setup_py.py # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 21 +++++++++++++++++++-- src/python/pants/engine/target_test.py | 4 ++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 380539544ea..72f66046298 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -923,9 +923,11 @@ def __init__( class AmbiguousCodegenImplementationsException(Exception): - """Exception for when there are multiple codegen implementations for the same path.""" + """Exception for when there are multiple codegen implementations and it is ambiguous which to + use.""" - def __init__(self, generators: Iterable[Type["GenerateSourcesRequest"]],) -> None: + # TODO: handle a) all the same generators and b) different generators + def __init__(self, generators: Iterable[Type["GenerateSourcesRequest"]]) -> None: example_generator = list(generators)[0] input = example_generator.input.__name__ output = example_generator.output.__name__ @@ -1286,6 +1288,19 @@ def filespec(self) -> Filespec: args=includes, exclude=[excludes], root=self.address.spec_path ) + @final + @classmethod + def can_generate(cls, output_type: Type["Sources"], union_membership: UnionMembership) -> bool: + """Can this Sources field be used to generate the output_type?""" + generate_request_types: Iterable[ + Type[GenerateSourcesRequest] + ] = union_membership.union_rules.get(GenerateSourcesRequest, ()) + return any( + issubclass(cls, generate_request_type.input) + and issubclass(generate_request_type.output, output_type) + for generate_request_type in generate_request_types + ) + @frozen_after_init @dataclass(unsafe_hash=True) @@ -1398,6 +1413,8 @@ async def hydrate_sources( # First, find if there are any code generators for the input `sources_field`. This will be used # to determine if the sources_field is valid or not. + # We could alternatively use `sources_field.can_generate()`, but we want to error if there are + # 2+ generators due to ambiguity. generate_request_types: Iterable[ Type[GenerateSourcesRequest] ] = union_membership.union_rules.get(GenerateSourcesRequest, ()) diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py index 92652088ba8..35daf6f780a 100644 --- a/src/python/pants/engine/target_test.py +++ b/src/python/pants/engine/target_test.py @@ -952,9 +952,11 @@ def setUp(self) -> None: self.address = Address.parse("src/avro:lib") self.create_files("src/avro", files=["f.avro"]) self.add_to_build_file("src/avro", "avro_library(name='lib', sources=['*.avro'])") + self.union_membership = self.request_single_product(UnionMembership, Params()) def test_generate_sources(self) -> None: protocol_sources = AvroSources(["*.avro"], address=self.address) + assert protocol_sources.can_generate(FortranSources, self.union_membership) is True # First, get the original protocol sources. hydrated_protocol_sources = self.request_single_product( @@ -985,6 +987,7 @@ class CustomAvroSources(AvroSources): pass protocol_sources = CustomAvroSources(["*.avro"], address=self.address) + assert protocol_sources.can_generate(FortranSources, self.union_membership) is True generated = self.request_single_product( HydratedSources, HydrateSourcesRequest( @@ -998,6 +1001,7 @@ class SmalltalkSources(Sources): pass protocol_sources = AvroSources(["*.avro"], address=self.address) + assert protocol_sources.can_generate(SmalltalkSources, self.union_membership) is False generated = self.request_single_product( HydratedSources, HydrateSourcesRequest( From b29c30c0485832d70d863f1e06083af2d03c020c Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Tue, 28 Apr 2020 12:06:33 -0700 Subject: [PATCH 08/11] Update call sites to use codegen # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- .../python/rules/importable_python_sources.py | 1 + .../backend/python/rules/run_setup_py.py | 36 ++++++++++++++----- .../core/util_rules/determine_source_files.py | 23 ++++++++++-- .../core/util_rules/strip_source_roots.py | 11 ++++-- 4 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/python/pants/backend/python/rules/importable_python_sources.py b/src/python/pants/backend/python/rules/importable_python_sources.py index bed7665bf6c..e65f86b2511 100644 --- a/src/python/pants/backend/python/rules/importable_python_sources.py +++ b/src/python/pants/backend/python/rules/importable_python_sources.py @@ -37,6 +37,7 @@ async def prepare_python_sources(targets: Targets) -> ImportablePythonSources: AllSourceFilesRequest( (tgt.get(Sources) for tgt in targets), for_sources_types=(PythonSources, ResourcesSources, FilesSources), + enable_codegen=True, strip_source_roots=True, ) ) diff --git a/src/python/pants/backend/python/rules/run_setup_py.py b/src/python/pants/backend/python/rules/run_setup_py.py index f9d16446993..74db8a9cf9b 100644 --- a/src/python/pants/backend/python/rules/run_setup_py.py +++ b/src/python/pants/backend/python/rules/run_setup_py.py @@ -68,6 +68,7 @@ TargetsWithOrigins, TransitiveTargets, ) +from pants.engine.unions import UnionMembership from pants.option.custom_types import shell_str from pants.python.python_setup import PythonSetup from pants.source.source_root import SourceRootConfig @@ -279,6 +280,7 @@ async def run_setup_pys( python_setup: PythonSetup, distdir: DistDir, workspace: Workspace, + union_membership: UnionMembership, ) -> SetupPy: """Run setup.py commands on all exported targets addressed.""" args = tuple(options.values.args) @@ -310,7 +312,7 @@ async def run_setup_pys( owners = await MultiGet( Get[ExportedTarget](OwnedDependency(tgt)) for tgt in transitive_targets.closure - if is_ownable_target(tgt) + if is_ownable_target(tgt, union_membership) ) exported_targets = list(FrozenOrderedSet(owners)) @@ -474,7 +476,9 @@ async def get_sources( stripped_srcs_list = await MultiGet( Get[SourceRootStrippedSources]( StripSourcesFieldRequest( - target.get(Sources), for_sources_types=(PythonSources, ResourcesSources) + target.get(Sources), + for_sources_types=(PythonSources, ResourcesSources), + enable_codegen=True, ) ) for target in targets @@ -523,7 +527,9 @@ async def get_ancestor_init_py( source_roots = source_root_config.get_source_roots() sources = await Get[SourceFiles]( AllSourceFilesRequest( - (tgt.get(Sources) for tgt in targets), for_sources_types=(PythonSources,) + (tgt.get(Sources) for tgt in targets), + for_sources_types=(PythonSources,), + enable_codegen=True, ) ) # Find the ancestors of all dirs containing .py files, including those dirs themselves. @@ -564,12 +570,16 @@ def _is_exported(target: Target) -> bool: @named_rule(desc="Compute distribution's 3rd party requirements") -async def get_requirements(dep_owner: DependencyOwner) -> ExportedTargetRequirements: +async def get_requirements( + dep_owner: DependencyOwner, union_membership: UnionMembership +) -> ExportedTargetRequirements: transitive_targets = await Get[TransitiveTargets]( Addresses([dep_owner.exported_target.target.address]) ) - ownable_tgts = [tgt for tgt in transitive_targets.closure if is_ownable_target(tgt)] + ownable_tgts = [ + tgt for tgt in transitive_targets.closure if is_ownable_target(tgt, union_membership) + ] owners = await MultiGet(Get[ExportedTarget](OwnedDependency(tgt)) for tgt in ownable_tgts) owned_by_us: Set[Target] = set() owned_by_others: Set[Target] = set() @@ -611,7 +621,9 @@ async def get_requirements(dep_owner: DependencyOwner) -> ExportedTargetRequirem @named_rule(desc="Find all code to be published in the distribution") -async def get_owned_dependencies(dependency_owner: DependencyOwner) -> OwnedDependencies: +async def get_owned_dependencies( + dependency_owner: DependencyOwner, union_membership: UnionMembership +) -> OwnedDependencies: """Find the dependencies of dependency_owner that are owned by it. Includes dependency_owner itself. @@ -619,7 +631,9 @@ async def get_owned_dependencies(dependency_owner: DependencyOwner) -> OwnedDepe transitive_targets = await Get[TransitiveTargets]( Addresses([dependency_owner.exported_target.target.address]) ) - ownable_targets = [tgt for tgt in transitive_targets.closure if is_ownable_target(tgt)] + ownable_targets = [ + tgt for tgt in transitive_targets.closure if is_ownable_target(tgt, union_membership) + ] owners = await MultiGet(Get[ExportedTarget](OwnedDependency(tgt)) for tgt in ownable_targets) owned_dependencies = [ tgt @@ -692,8 +706,12 @@ async def setup_setuptools(setuptools: Setuptools) -> SetuptoolsSetup: return SetuptoolsSetup(requirements_pex=requirements_pex,) -def is_ownable_target(tgt: Target) -> bool: - return tgt.has_field(PythonSources) or tgt.has_field(ResourcesSources) +def is_ownable_target(tgt: Target, union_membership: UnionMembership) -> bool: + return ( + tgt.has_field(PythonSources) + or tgt.has_field(ResourcesSources) + or tgt.get(Sources).can_generate(PythonSources, union_membership) + ) def rules(): diff --git a/src/python/pants/core/util_rules/determine_source_files.py b/src/python/pants/core/util_rules/determine_source_files.py index 403d1bcef7a..eae3f7ebcbf 100644 --- a/src/python/pants/core/util_rules/determine_source_files.py +++ b/src/python/pants/core/util_rules/determine_source_files.py @@ -36,6 +36,7 @@ def files(self) -> Tuple[str, ...]: class AllSourceFilesRequest: sources_fields: Tuple[SourcesField, ...] for_sources_types: Tuple[Type[SourcesField], ...] + enable_codegen: bool strip_source_roots: bool def __init__( @@ -43,10 +44,12 @@ def __init__( sources_fields: Iterable[SourcesField], *, for_sources_types: Iterable[Type[SourcesField]] = (SourcesField,), + enable_codegen: bool = False, strip_source_roots: bool = False ) -> None: self.sources_fields = tuple(sources_fields) self.for_sources_types = tuple(for_sources_types) + self.enable_codegen = enable_codegen self.strip_source_roots = strip_source_roots @@ -55,6 +58,7 @@ def __init__( class SpecifiedSourceFilesRequest: sources_fields_with_origins: Tuple[Tuple[SourcesField, OriginSpec], ...] for_sources_types: Tuple[Type[SourcesField], ...] + enable_codegen: bool strip_source_roots: bool def __init__( @@ -62,10 +66,12 @@ def __init__( sources_fields_with_origins: Iterable[Tuple[SourcesField, OriginSpec]], *, for_sources_types: Iterable[Type[SourcesField]] = (SourcesField,), + enable_codegen: bool = False, strip_source_roots: bool = False ) -> None: self.sources_fields_with_origins = tuple(sources_fields_with_origins) self.for_sources_types = tuple(for_sources_types) + self.enable_codegen = enable_codegen self.strip_source_roots = strip_source_roots @@ -91,7 +97,11 @@ async def determine_all_source_files(request: AllSourceFilesRequest) -> SourceFi if request.strip_source_roots: stripped_snapshots = await MultiGet( Get[SourceRootStrippedSources]( - StripSourcesFieldRequest(sources_field, for_sources_types=request.for_sources_types) + StripSourcesFieldRequest( + sources_field, + for_sources_types=request.for_sources_types, + enable_codegen=request.enable_codegen, + ) ) for sources_field in request.sources_fields ) @@ -101,7 +111,11 @@ async def determine_all_source_files(request: AllSourceFilesRequest) -> SourceFi else: all_hydrated_sources = await MultiGet( Get[HydratedSources]( - HydrateSourcesRequest(sources_field, for_sources_types=request.for_sources_types) + HydrateSourcesRequest( + sources_field, + for_sources_types=request.for_sources_types, + enable_codegen=request.enable_codegen, + ) ) for sources_field in request.sources_fields ) @@ -119,7 +133,9 @@ async def determine_specified_source_files(request: SpecifiedSourceFilesRequest) all_hydrated_sources = await MultiGet( Get[HydratedSources]( HydrateSourcesRequest( - sources_field_with_origin[0], for_sources_types=request.for_sources_types + sources_field_with_origin[0], + for_sources_types=request.for_sources_types, + enable_codegen=request.enable_codegen, ) ) for sources_field_with_origin in request.sources_fields_with_origins @@ -154,6 +170,7 @@ async def determine_specified_source_files(request: SpecifiedSourceFilesRequest) sources_field, specified_files_snapshot=snapshot, for_sources_types=request.for_sources_types, + enable_codegen=request.enable_codegen, ) ) for sources_field, snapshot in zip(all_sources_fields, all_snapshots) diff --git a/src/python/pants/core/util_rules/strip_source_roots.py b/src/python/pants/core/util_rules/strip_source_roots.py index a490efd6c20..24758cf631c 100644 --- a/src/python/pants/core/util_rules/strip_source_roots.py +++ b/src/python/pants/core/util_rules/strip_source_roots.py @@ -58,18 +58,21 @@ class StripSourcesFieldRequest: """ sources_field: SourcesField - for_sources_types: Tuple[Type[SourcesField], ...] = (SourcesField,) - specified_files_snapshot: Optional[Snapshot] = None + for_sources_types: Tuple[Type[SourcesField], ...] + enable_codegen: bool + specified_files_snapshot: Optional[Snapshot] def __init__( self, sources_field: SourcesField, *, for_sources_types: Iterable[Type[SourcesField]] = (SourcesField,), + enable_codegen: bool = False, specified_files_snapshot: Optional[Snapshot] = None, ) -> None: self.sources_field = sources_field self.for_sources_types = tuple(for_sources_types) + self.enable_codegen = enable_codegen self.specified_files_snapshot = specified_files_snapshot @@ -145,7 +148,9 @@ async def strip_source_roots_from_sources_field( else: hydrated_sources = await Get[HydratedSources]( HydrateSourcesRequest( - request.sources_field, for_sources_types=request.for_sources_types + request.sources_field, + for_sources_types=request.for_sources_types, + enable_codegen=request.enable_codegen, ) ) sources_snapshot = hydrated_sources.snapshot From 4138f3826af716210d964d7be275a9551b3683b6 Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Tue, 28 Apr 2020 13:44:33 -0700 Subject: [PATCH 09/11] Improve error message for ambiguous codegen implementations # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 50 ++++++++++++++++++++------ src/python/pants/engine/target_test.py | 42 ++++++++++++++++++++++ 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 8926341c2ad..0a1a3671f04 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -926,18 +926,44 @@ class AmbiguousCodegenImplementationsException(Exception): """Exception for when there are multiple codegen implementations and it is ambiguous which to use.""" - # TODO: handle a) all the same generators and b) different generators - def __init__(self, generators: Iterable[Type["GenerateSourcesRequest"]]) -> None: - example_generator = list(generators)[0] - input = example_generator.input.__name__ - output = example_generator.output.__name__ - possible_generators = sorted(generator.__name__ for generator in generators) + def __init__( + self, + generators: Iterable[Type["GenerateSourcesRequest"]], + *, + for_sources_types: Iterable[Type["Sources"]], + ) -> None: bulleted_list_sep = "\n * " - super().__init__( - f"Multiple of the registered code generators can generate {output} given {input}. It " - "is ambiguous which implementation to use.\n\nPossible implementations:" - f"{bulleted_list_sep}{bulleted_list_sep.join(possible_generators)}" + all_same_generator_paths = ( + len(set((generator.input, generator.output) for generator in generators)) == 1 ) + example_generator = list(generators)[0] + input = example_generator.input.__name__ + if all_same_generator_paths: + output = example_generator.output.__name__ + possible_generators = sorted(generator.__name__ for generator in generators) + super().__init__( + f"Multiple of the registered code generators can generate {output} from {input}. " + "It is ambiguous which implementation to use.\n\nPossible implementations:" + f"{bulleted_list_sep}{bulleted_list_sep.join(possible_generators)}" + ) + else: + possible_output_types = sorted( + generator.output.__name__ + for generator in generators + if issubclass(generator.output, tuple(for_sources_types)) + ) + possible_generators_with_output = [ + f"{generator.__name__} -> {generator.output.__name__}" + for generator in sorted(generators, key=lambda generator: generator.output.__name__) + ] + super().__init__( + f"Multiple of the registered code generators can generate one of " + f"{possible_output_types} from {input}. It is ambiguous which implementation to " + f"use. This can happen when the call site requests too many different output types " + f"from the same original protocol sources.\n\nPossible implementations with their " + f"output type: {bulleted_list_sep}" + f"{bulleted_list_sep.join(possible_generators_with_output)}" + ) # ----------------------------------------------------------------------------------------------- @@ -1425,7 +1451,9 @@ async def hydrate_sources( and issubclass(generate_request_type.output, request.for_sources_types) ] if request.enable_codegen and len(relevant_generate_request_types) > 1: - raise AmbiguousCodegenImplementationsException(relevant_generate_request_types) + raise AmbiguousCodegenImplementationsException( + relevant_generate_request_types, for_sources_types=request.for_sources_types + ) generate_request_type = next(iter(relevant_generate_request_types), None) # Now, determine if any of the `for_sources_types` may be used, either because the diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py index 3b6c0545cb3..4f72c530e74 100644 --- a/src/python/pants/engine/target_test.py +++ b/src/python/pants/engine/target_test.py @@ -22,6 +22,7 @@ from pants.engine.rules import RootRule, rule from pants.engine.selectors import Get, Params from pants.engine.target import ( + AmbiguousCodegenImplementationsException, AmbiguousImplementationsException, AsyncField, BoolField, @@ -1010,3 +1011,44 @@ class SmalltalkSources(Sources): ) assert generated.snapshot.files == () assert generated.output_type is None + + def test_ambiguous_implementations_exception(self) -> None: + # This error message is quite complex. We test that it correctly generates the message. + class FortranGenerator1(GenerateSourcesRequest): + input = AvroSources + output = FortranSources + + class FortranGenerator2(GenerateSourcesRequest): + input = AvroSources + output = FortranSources + + class SmalltalkSources(Sources): + pass + + class SmalltalkGenerator(GenerateSourcesRequest): + input = AvroSources + output = SmalltalkSources + + class IrrelevantSources(Sources): + pass + + # Test when all generators have the same input and output. + exc = AmbiguousCodegenImplementationsException( + [FortranGenerator1, FortranGenerator2], for_sources_types=[FortranSources] + ) + assert "can generate FortranSources from AvroSources" in str(exc) + assert "* FortranGenerator1" in str(exc) + assert "* FortranGenerator2" in str(exc) + + # Test when the generators have different input and output, which usually happens because + # the call site used too expansive of a `for_sources_types` argument. + exc = AmbiguousCodegenImplementationsException( + [FortranGenerator1, SmalltalkGenerator], + for_sources_types=[FortranSources, SmalltalkSources, IrrelevantSources], + ) + assert "can generate one of ['FortranSources', 'SmalltalkSources'] from AvroSources" in str( + exc + ) + assert "IrrelevantSources" not in str(exc) + assert "* FortranGenerator1 -> FortranSources" in str(exc) + assert "* SmalltalkGenerator -> SmalltalkSources" in str(exc) From e05d8b68611523607c4f2fb80475953f762aeed3 Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Tue, 28 Apr 2020 13:49:29 -0700 Subject: [PATCH 10/11] Rename HydratedSources.output_type to sources_type Less of a misnomer. This is not the actual output. It's rather a description of the sources. This also fits in better with `HydrateSourcesRequest.for_sources_types`. # Delete this line to force CI to run Clippy and the Rust tests. [ci skip-rust-tests] # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 28 +++++++++++++------------- src/python/pants/engine/target_test.py | 8 ++++---- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 0a1a3671f04..1e3004d3a9a 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -1346,7 +1346,7 @@ def __init__( If you only want to handle certain Sources fields, such as only PythonSources, set `for_sources_types`. Any invalid sources will return a `HydratedSources` instance with an - empty snapshot and `output_type = None`. + empty snapshot and `sources_type = None`. If `enable_codegen` is set to `True`, any codegen sources will try to be converted to one of the `for_sources_types`. @@ -1372,17 +1372,17 @@ def __post_init__(self) -> None: class HydratedSources: """The result of hydrating a SourcesField. - The `output_type` will indicate which of the `HydrateSourcesRequest.for_sources_type` the result - corresponds to, e.g. if the result comes from `FilesSources` vs. `PythonSources`. If this value - is None, then the input `Sources` field was not one of the expected types; or, when codegen was - enabled in the request, there was no valid code generator to generate the requested language - from the original input. This property allows for switching on the result, e.g. handling - hydrated files() sources differently than hydrated Python sources. + The `sources_type` will indicate which of the `HydrateSourcesRequest.for_sources_type` the + result corresponds to, e.g. if the result comes from `FilesSources` vs. `PythonSources`. If this + value is None, then the input `Sources` field was not one of the expected types; or, when + codegen was enabled in the request, there was no valid code generator to generate the requested + language from the original input. This property allows for switching on the result, e.g. + handling hydrated files() sources differently than hydrated Python sources. """ snapshot: Snapshot filespec: Filespec - output_type: Optional[Type[Sources]] + sources_type: Optional[Type[Sources]] def eager_fileset_with_spec(self, *, address: Address) -> EagerFilesetWithSpec: return EagerFilesetWithSpec(address.spec_path, self.filespec, self.snapshot) @@ -1467,7 +1467,7 @@ def compatible_with_sources_field(valid_type: Type[Sources]) -> bool: ) return is_instance or can_be_generated - output_type = next( + sources_type = next( ( valid_type for valid_type in request.for_sources_types @@ -1475,14 +1475,14 @@ def compatible_with_sources_field(valid_type: Type[Sources]) -> bool: ), None, ) - if output_type is None: - return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec, output_type=None) + if sources_type is None: + return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec, sources_type=None) # Now, hydrate the `globs`. Even if we are going to use codegen, we will need the original # protocol sources to be hydrated. globs = sources_field.sanitized_raw_value if globs is None: - return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec, output_type=output_type) + return HydratedSources(EMPTY_SNAPSHOT, sources_field.filespec, sources_type=sources_type) conjunction = ( GlobExpansionConjunction.all_match @@ -1507,13 +1507,13 @@ def compatible_with_sources_field(valid_type: Type[Sources]) -> bool: # Finally, return if codegen is not in use; otherwise, run the relevant code generator. if not request.enable_codegen or generate_request_type is None: - return HydratedSources(snapshot, sources_field.filespec, output_type=output_type) + return HydratedSources(snapshot, sources_field.filespec, sources_type=sources_type) wrapped_protocol_target = await Get[WrappedTarget](Address, sources_field.address) generated_sources = await Get[GeneratedSources]( GenerateSourcesRequest, generate_request_type(snapshot, wrapped_protocol_target.target) ) return HydratedSources( - generated_sources.snapshot, sources_field.filespec, output_type=output_type + generated_sources.snapshot, sources_field.filespec, sources_type=sources_type ) diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py index 4f72c530e74..b95494fe2d2 100644 --- a/src/python/pants/engine/target_test.py +++ b/src/python/pants/engine/target_test.py @@ -814,7 +814,7 @@ class SourcesSubclass(Sources): HydrateSourcesRequest(valid_sources, for_sources_types=[SourcesSubclass]), ) assert hydrated_valid_sources.snapshot.files == ("f1.f95",) - assert hydrated_valid_sources.output_type == SourcesSubclass + assert hydrated_valid_sources.sources_type == SourcesSubclass invalid_sources = Sources(["*"], address=addr) hydrated_invalid_sources = self.request_single_product( @@ -822,7 +822,7 @@ class SourcesSubclass(Sources): HydrateSourcesRequest(invalid_sources, for_sources_types=[SourcesSubclass]), ) assert hydrated_invalid_sources.snapshot.files == () - assert hydrated_invalid_sources.output_type is None + assert hydrated_invalid_sources.sources_type is None def test_unmatched_globs(self) -> None: self.create_files("", files=["f1.f95"]) @@ -981,7 +981,7 @@ def test_generate_sources(self) -> None: ), ) assert generated_via_hydrate_sources.snapshot.files == ("src/fortran/f.f95",) - assert generated_via_hydrate_sources.output_type == FortranSources + assert generated_via_hydrate_sources.sources_type == FortranSources def test_works_with_subclass_fields(self) -> None: class CustomAvroSources(AvroSources): @@ -1010,7 +1010,7 @@ class SmalltalkSources(Sources): ), ) assert generated.snapshot.files == () - assert generated.output_type is None + assert generated.sources_type is None def test_ambiguous_implementations_exception(self) -> None: # This error message is quite complex. We test that it correctly generates the message. From 7aabe194309555863b2c6c1e5dfeec4f9aff23be Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Tue, 28 Apr 2020 14:51:16 -0700 Subject: [PATCH 11/11] Expand docstring for Sources.can_generate() # Delete this line to force CI to run Clippy and the Rust tests. [ci skip-rust-tests] # Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/python/pants/engine/target.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/python/pants/engine/target.py b/src/python/pants/engine/target.py index 1e3004d3a9a..2fa645ccad6 100644 --- a/src/python/pants/engine/target.py +++ b/src/python/pants/engine/target.py @@ -1317,7 +1317,23 @@ def filespec(self) -> Filespec: @final @classmethod def can_generate(cls, output_type: Type["Sources"], union_membership: UnionMembership) -> bool: - """Can this Sources field be used to generate the output_type?""" + """Can this Sources field be used to generate the output_type? + + Generally, this method does not need to be used. Most call sites can simply use the below, + and the engine will generate the sources if possible or will return an instance of + HydratedSources with an empty snapshot if not possible: + + await Get[HydratedSources]( + HydrateSourcesRequest( + sources_field, + for_sources_types=[FortranSources], + enable_codegen=True, + ) + ) + + This method is useful when you need to filter targets before hydrating them, such as how + you may filter targets via `tgt.has_field(MyField)`. + """ generate_request_types: Iterable[ Type[GenerateSourcesRequest] ] = union_membership.union_rules.get(GenerateSourcesRequest, ())