Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[internal] Python module mapping considers resolves #14034

Merged
merged 1 commit into from
Jan 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import enum
import itertools
import logging
from collections import defaultdict
from dataclasses import dataclass
Expand All @@ -16,7 +17,9 @@
DEFAULT_MODULE_MAPPING,
DEFAULT_TYPE_STUB_MODULE_MAPPING,
)
from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.target_types import (
PythonRequirementCompatibleResolvesField,
PythonRequirementModulesField,
PythonRequirementsField,
PythonRequirementTypeStubModulesField,
Expand Down Expand Up @@ -173,10 +176,21 @@ async def map_first_party_python_targets_to_modules(
# Third party module mapping
# -----------------------------------------------------------------------------------------------

_ResolveName = str

class ThirdPartyPythonModuleMapping(FrozenDict[str, Tuple[ModuleProvider, ...]]):
def providers_for_module(self, module: str) -> tuple[ModuleProvider, ...]:
result = self.get(module, ())

class ThirdPartyPythonModuleMapping(
FrozenDict[_ResolveName, FrozenDict[str, Tuple[ModuleProvider, ...]]]
):
"""A mapping of each resolve to the modules they contain and the addresses providing those
modules."""

def _providers_for_resolve(self, module: str, resolve: str) -> tuple[ModuleProvider, ...]:
mapping = self.get(resolve)
if not mapping:
return ()

result = mapping.get(module, ())
if result:
return result

Expand All @@ -185,25 +199,50 @@ def providers_for_module(self, module: str) -> tuple[ModuleProvider, ...]:
if "." not in module:
return ()
parent_module = module.rsplit(".", maxsplit=1)[0]
return self.providers_for_module(parent_module)
return self._providers_for_resolve(parent_module, resolve)

def providers_for_module(
self, module: str, resolves: Iterable[str] | None
) -> tuple[ModuleProvider, ...]:
"""Find all providers for the module.

If `resolves` is None, will not consider resolves, i.e. any `python_requirement` can be
consumed. Otherwise, providers can only come from `python_requirements` marked compatible
with those resolves.
"""
if resolves is None:
resolves = list(self.keys())
Comment on lines +209 to +214
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll continue to set resolves=None if --no-python-enable-resolves (the default).

return tuple(
itertools.chain.from_iterable(
self._providers_for_resolve(module, resolve) for resolve in resolves
)
)


@rule(desc="Creating map of third party targets to Python modules", level=LogLevel.DEBUG)
async def map_third_party_modules_to_addresses(
all_python_tgts: AllPythonTargets,
python_setup: PythonSetup,
) -> ThirdPartyPythonModuleMapping:
modules_to_providers: DefaultDict[str, list[ModuleProvider]] = defaultdict(list)
resolves_to_modules_to_providers: dict[
_ResolveName, DefaultDict[str, list[ModuleProvider]]
] = {}

for tgt in all_python_tgts.third_party:
tgt[PythonRequirementCompatibleResolvesField].validate(python_setup)
resolves = tgt[PythonRequirementCompatibleResolvesField].value_or_default(python_setup)

def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
for module in modules:
modules_to_providers[module].append(
ModuleProvider(
tgt.address,
ModuleProviderType.TYPE_STUB if type_stub else ModuleProviderType.IMPL,
for resolve in resolves:
if resolve not in resolves_to_modules_to_providers:
resolves_to_modules_to_providers[resolve] = defaultdict(list)
for module in modules:
resolves_to_modules_to_providers[resolve][module].append(
ModuleProvider(
tgt.address,
ModuleProviderType.TYPE_STUB if type_stub else ModuleProviderType.IMPL,
)
)
)

explicit_modules = tgt.get(PythonRequirementModulesField).value
if explicit_modules:
Expand Down Expand Up @@ -240,7 +279,13 @@ def add_modules(modules: Iterable[str], *, type_stub: bool = False) -> None:
add_modules(DEFAULT_MODULE_MAPPING.get(proj_name, (fallback_value,)))

return ThirdPartyPythonModuleMapping(
(k, tuple(sorted(v))) for k, v in sorted(modules_to_providers.items())
(
resolve,
FrozenDict(
(mod, tuple(sorted(providers))) for mod, providers in sorted(mapping.items())
),
)
for resolve, mapping in sorted(resolves_to_modules_to_providers.items())
)


Expand Down Expand Up @@ -276,7 +321,7 @@ async def map_module_to_address(
third_party_mapping: ThirdPartyPythonModuleMapping,
) -> PythonModuleOwners:
providers = [
*third_party_mapping.providers_for_module(module.module),
*third_party_mapping.providers_for_module(module.module, resolves=None),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the next change: rather than using all of the resolves from compatible_resolves for a target, this will probably need to be exactly one resolve chosen by the caller: that's related to the "choose how many permutations of parameters to use" problem of #13882... but in the meantime, I think that we'll have to choose arbitrarily (first in the list or something).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, I'm still trying to internalize what that means. Should we update JVM to do that?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, I'm still trying to internalize what that means. Should we update JVM to do that?

Me too, heh. And yea, JVM should be updated as well once we have a better idea of what it means via #13882.

Copy link
Contributor Author

@Eric-Arellano Eric-Arellano Jan 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, okay, I think an example will help me to understand this. Is this right?

# BUILD
python_requirement(
  name='colors',
  requirements=['ansicolors'],
  compatible_resolves=['a', 'b'],
)

python_requirement(
  name='tensorflow-v1',
  requirements=['tensorflow==1.0'],
  compatible_resolves=['a'],
)

python_requirement(
  name='tensorflow-v2',
  requirements=['tensorflow==2.0'],
  compatible_resolves=['b'],
)

Meaning resolves are:

  • a: [ansicolors, tensorflow-v1]
  • b: [ansicolors, tensorflow-v2]

We have utils.py, where the Tensorflow import works with both 1.0 and 2.0, so we can mark the file as compatible with both resolves:

# f.py
import colors
from tensorflow import some_table_api
python_source(
  name="f",
  source="f.py",
  compatible_resolves=["a", "b"],
)

The dependency on ansicolors is fine, it's the same requirement in both resolves. But, TensorFlow is different across the two - if we matched against every compatible resolve, there would still be ambiguity, even though the caller will be able to disambiguate by choosing either "a" or "b".

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But, TensorFlow is different across the two - if we matched against every compatible resolve, there would still be ambiguity, even though the caller will be able to disambiguate by choosing either "a" or "b".

I believe that how this will need to work is that the resolve to use comes in as something baked into the Address of targets, such that f@compatible_resolve=a is a different target from f@compatible_resolve=b. How many of those there will be, and whether there are named collections of multiplexed arguments is an open question. At that point, we'd uniquely compute deps per target.

At a fundamental level, it's very similar to how you might do this with macros/target-generators... but my hope is that making the engine aware of the permutations makes it easier to configure the multiplexing and decide how many permutations to build locally vs in CI, etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

such that f@compatible_resolve=a is a different target from f@compatible_resolve=b.

That makes me happy from a rule author perspective. That's easier to work with.

Although, this does introduce a problem with dependency inference for first-party targets, doesn't it? If there are two variants of f, then any import of f will be ambiguous which variant to use....unless! We update first-party dependency inference so you can only infer a dep on a target with your resolve? So if we have f1@{a,b} and f2@{a,b}, then f2@a infers a dep on f1@a and f2@b infers a dep on f1@a?

*first_party_mapping.providers_for_module(module.module),
]
addresses = tuple(provider.addr for provider in providers)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pants.core.util_rules import stripped_source_files
from pants.engine.addresses import Address
from pants.testutil.rule_runner import QueryRule, RuleRunner
from pants.util.frozendict import FrozenDict


def test_default_module_mapping_is_normalized() -> None:
Expand Down Expand Up @@ -124,16 +125,23 @@ def test_third_party_modules_mapping() -> None:
)
mapping = ThirdPartyPythonModuleMapping(
{
"colors": (colors_provider, colors_stubs_provider),
"pants": (pants_provider,),
"req.submodule": (submodule_provider,),
"pants.testutil": (pants_testutil_provider,),
"ambiguous": (colors_provider, pants_provider),
"default-resolve": FrozenDict(
{
"colors": (colors_provider, colors_stubs_provider),
"pants": (pants_provider,),
"req.submodule": (submodule_provider,),
"pants.testutil": (pants_testutil_provider,),
"two_resolves": (colors_provider,),
}
),
"another-resolve": FrozenDict({"two_resolves": (pants_provider,)}),
}
)

def assert_addresses(mod: str, expected: tuple[ModuleProvider, ...]) -> None:
assert mapping.providers_for_module(mod) == expected
def assert_addresses(
mod: str, expected: tuple[ModuleProvider, ...], *, resolves: list[str] | None = None
) -> None:
assert mapping.providers_for_module(mod, resolves) == expected

assert_addresses("colors", (colors_provider, colors_stubs_provider))
assert_addresses("colors.red", (colors_provider, colors_stubs_provider))
Expand All @@ -154,9 +162,20 @@ def assert_addresses(mod: str, expected: tuple[ModuleProvider, ...]) -> None:
assert_addresses("unknown", ())
assert_addresses("unknown.pants", ())

assert_addresses("ambiguous", (colors_provider, pants_provider))
assert_addresses("ambiguous.foo", (colors_provider, pants_provider))
assert_addresses("ambiguous.foo.bar", (colors_provider, pants_provider))
assert_addresses("two_resolves", (colors_provider, pants_provider), resolves=None)
assert_addresses("two_resolves.foo", (colors_provider, pants_provider), resolves=None)
assert_addresses("two_resolves.foo.bar", (colors_provider, pants_provider), resolves=None)
assert_addresses("two_resolves", (colors_provider,), resolves=["default-resolve"])
assert_addresses("two_resolves", (pants_provider,), resolves=["another-resolve"])
assert_addresses(
"two_resolves",
(
colors_provider,
pants_provider,
),
resolves=["default-resolve", "another-resolve"],
)
assert_addresses("two_resolves", (), resolves=[])


@pytest.fixture
Expand Down Expand Up @@ -280,11 +299,13 @@ def req(
*,
modules: list[str] | None = None,
stub_modules: list[str] | None = None,
resolves: list[str] | None = None,
) -> str:
return (
f"python_requirement(name='{tgt_name}', requirements=['{req_str}'], "
f"modules={modules or []},"
f"type_stub_modules={stub_modules or []})"
f"type_stub_modules={stub_modules or []},"
f"experimental_compatible_resolves={resolves or ['default']})"
)

build_file = "\n\n".join(
Expand All @@ -302,59 +323,89 @@ def req(
req("typed-dep5", "typed-dep5-foo", stub_modules=["typed_dep5"]),
# A 3rd-party dependency can have both a type stub and implementation.
req("multiple_owners1", "multiple_owners==1"),
req("multiple_owners2", "multiple_owners==2"),
req("multiple_owners_types", "types-multiple_owners==1"),
req("multiple_owners2", "multiple_owners==2", resolves=["another"]),
req("multiple_owners_types", "types-multiple_owners==1", resolves=["another"]),
# Only assume it's a type stubs dep if we are certain it's not an implementation.
req("looks_like_stubs", "looks-like-stubs-types", modules=["looks_like_stubs"]),
]
)
rule_runner.write_files({"BUILD": build_file})
rule_runner.set_options(["--python-experimental-resolves={'default': '', 'another': ''}"])
result = rule_runner.request(ThirdPartyPythonModuleMapping, [])
assert result == ThirdPartyPythonModuleMapping(
{
"file_dist": (
ModuleProvider(Address("", target_name="file_dist"), ModuleProviderType.IMPL),
),
"looks_like_stubs": (
ModuleProvider(
Address("", target_name="looks_like_stubs"), ModuleProviderType.IMPL
),
),
"mapped_module": (
ModuleProvider(Address("", target_name="modules"), ModuleProviderType.IMPL),
),
"multiple_owners": (
ModuleProvider(
Address("", target_name="multiple_owners1"), ModuleProviderType.IMPL
),
ModuleProvider(
Address("", target_name="multiple_owners2"), ModuleProviderType.IMPL
),
ModuleProvider(
Address("", target_name="multiple_owners_types"), ModuleProviderType.TYPE_STUB
),
),
"req1": (ModuleProvider(Address("", target_name="req1"), ModuleProviderType.IMPL),),
"typed_dep1": (
ModuleProvider(Address("", target_name="typed-dep1"), ModuleProviderType.TYPE_STUB),
),
"typed_dep2": (
ModuleProvider(Address("", target_name="typed-dep2"), ModuleProviderType.TYPE_STUB),
),
"typed_dep3": (
ModuleProvider(Address("", target_name="typed-dep3"), ModuleProviderType.TYPE_STUB),
),
"typed_dep4": (
ModuleProvider(Address("", target_name="typed-dep4"), ModuleProviderType.TYPE_STUB),
),
"typed_dep5": (
ModuleProvider(Address("", target_name="typed-dep5"), ModuleProviderType.TYPE_STUB),
),
"un_normalized_project": (
ModuleProvider(Address("", target_name="un_normalized"), ModuleProviderType.IMPL),
"another": FrozenDict(
{
"multiple_owners": (
ModuleProvider(
Address("", target_name="multiple_owners2"), ModuleProviderType.IMPL
),
ModuleProvider(
Address("", target_name="multiple_owners_types"),
ModuleProviderType.TYPE_STUB,
),
),
}
),
"vcs_dist": (
ModuleProvider(Address("", target_name="vcs_dist"), ModuleProviderType.IMPL),
"default": FrozenDict(
{
"file_dist": (
ModuleProvider(
Address("", target_name="file_dist"), ModuleProviderType.IMPL
),
),
"looks_like_stubs": (
ModuleProvider(
Address("", target_name="looks_like_stubs"), ModuleProviderType.IMPL
),
),
"mapped_module": (
ModuleProvider(Address("", target_name="modules"), ModuleProviderType.IMPL),
),
"multiple_owners": (
ModuleProvider(
Address("", target_name="multiple_owners1"), ModuleProviderType.IMPL
),
),
"req1": (
ModuleProvider(Address("", target_name="req1"), ModuleProviderType.IMPL),
),
"typed_dep1": (
ModuleProvider(
Address("", target_name="typed-dep1"), ModuleProviderType.TYPE_STUB
),
),
"typed_dep2": (
ModuleProvider(
Address("", target_name="typed-dep2"), ModuleProviderType.TYPE_STUB
),
),
"typed_dep3": (
ModuleProvider(
Address("", target_name="typed-dep3"), ModuleProviderType.TYPE_STUB
),
),
"typed_dep4": (
ModuleProvider(
Address("", target_name="typed-dep4"), ModuleProviderType.TYPE_STUB
),
),
"typed_dep5": (
ModuleProvider(
Address("", target_name="typed-dep5"), ModuleProviderType.TYPE_STUB
),
),
"un_normalized_project": (
ModuleProvider(
Address("", target_name="un_normalized"), ModuleProviderType.IMPL
),
),
"vcs_dist": (
ModuleProvider(
Address("", target_name="vcs_dist"), ModuleProviderType.IMPL
),
),
}
),
}
)
Expand Down
8 changes: 4 additions & 4 deletions src/python/pants/backend/python/goals/lockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.target_types import (
EntryPoint,
PythonCompatibleResolvesField,
PythonRequirementCompatibleResolvesField,
PythonRequirementsField,
UnrecognizedResolveNamesError,
)
Expand Down Expand Up @@ -273,10 +273,10 @@ async def setup_user_lockfile_requests(

resolve_to_requirements_fields = defaultdict(set)
for tgt in all_targets:
if not tgt.has_field(PythonCompatibleResolvesField):
if not tgt.has_field(PythonRequirementCompatibleResolvesField):
continue
tgt[PythonCompatibleResolvesField].validate(python_setup)
for resolve in tgt[PythonCompatibleResolvesField].value_or_default(python_setup):
tgt[PythonRequirementCompatibleResolvesField].validate(python_setup)
for resolve in tgt[PythonRequirementCompatibleResolvesField].value_or_default(python_setup):
resolve_to_requirements_fields[resolve].add(tgt[PythonRequirementsField])

# TODO: Figure out how to determine which interpreter constraints to use for each resolve...
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/backend/python/target_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ class PythonRequirementTarget(Target):
PythonRequirementsField,
PythonRequirementModulesField,
PythonRequirementTypeStubModulesField,
PythonCompatibleResolvesField,
PythonRequirementCompatibleResolvesField,
)
help = (
"A Python requirement installable by pip.\n\n"
Expand Down