Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port Field to Rust #19143

Merged
merged 4 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/python/pants/backend/awslambda/python/rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def test_layer_must_have_dependencies(rule_runner: PythonRuleRunner) -> None:
{"BUILD": "python_aws_lambda_layer(name='lambda', runtime='python3.7')"}
)
with pytest.raises(
ExecutionError, match="The 'dependencies' field in target //:lambda must be defined"
ExecutionError, match="The `dependencies` field in target //:lambda must be defined"
):
create_python_awslambda(
rule_runner,
Expand Down
23 changes: 12 additions & 11 deletions src/python/pants/backend/javascript/goals/test_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
JSTestBatchCompatibilityTagField,
JSTestExtraEnvVarsField,
)
from pants.build_graph.address import Address
from pants.testutil.rule_runner import MockGet, run_rule_with_mocks


Expand All @@ -34,13 +35,13 @@ def given_field_set(


def test_batches_are_seperated_on_owning_packages() -> None:
field_set_1 = given_field_set(sentinel.address_1, batch_compatibility_tag="default")
field_set_2 = given_field_set(sentinel.address_2, batch_compatibility_tag="default")
field_set_1 = given_field_set(Address("1"), batch_compatibility_tag="default")
field_set_2 = given_field_set(Address("2"), batch_compatibility_tag="default")

request = JSTestRequest.PartitionRequest(field_sets=(field_set_1, field_set_2))

def mocked_owning_node_package(r: OwningNodePackageRequest) -> Any:
if r.address == sentinel.address_1:
if r.address == Address("1"):
return OwningNodePackage(sentinel.owning_package_1)
else:
return OwningNodePackage(sentinel.owning_package_2)
Expand All @@ -60,20 +61,20 @@ def mocked_owning_node_package(r: OwningNodePackageRequest) -> Any:
"field_set_1, field_set_2",
[
pytest.param(
given_field_set(sentinel.address_1, batch_compatibility_tag="1"),
given_field_set(sentinel.address_2, batch_compatibility_tag="2"),
given_field_set(Address("1"), batch_compatibility_tag="1"),
given_field_set(Address("2"), batch_compatibility_tag="2"),
id="compatibility_tag",
),
pytest.param(
given_field_set(sentinel.address_1, batch_compatibility_tag="default"),
given_field_set(Address("1"), batch_compatibility_tag="default"),
given_field_set(
sentinel.address_2, batch_compatibility_tag="default", env_vars=("NODE_ENV=dev",)
Address("2"), batch_compatibility_tag="default", env_vars=("NODE_ENV=dev",)
),
id="extra_env_vars",
),
pytest.param(
given_field_set(sentinel.address_1, batch_compatibility_tag=None),
given_field_set(sentinel.address_2, batch_compatibility_tag=None),
given_field_set(Address("1"), batch_compatibility_tag=None),
given_field_set(Address("2"), batch_compatibility_tag=None),
id="no_compatibility_tag",
),
],
Expand All @@ -96,9 +97,9 @@ def mocked_owning_node_package(_: OwningNodePackageRequest) -> Any:


def test_batches_are_the_same_for_same_compat_and_package() -> None:
field_set_1 = given_field_set(sentinel.address_1, batch_compatibility_tag="default")
field_set_1 = given_field_set(Address("1"), batch_compatibility_tag="default")

field_set_2 = given_field_set(sentinel.address_2, batch_compatibility_tag="default")
field_set_2 = given_field_set(Address("2"), batch_compatibility_tag="default")
request = JSTestRequest.PartitionRequest(field_sets=(field_set_1, field_set_2))

def mocked_owning_node_package(_: OwningNodePackageRequest) -> Any:
Expand Down
101 changes: 100 additions & 1 deletion src/python/pants/engine/internals/native_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ from __future__ import annotations
from io import RawIOBase
from typing import (
Any,
Callable,
ClassVar,
FrozenSet,
Generic,
Iterable,
Expand Down Expand Up @@ -35,7 +37,7 @@ class PyFailure:
def get_error(self) -> Exception | None: ...

# ------------------------------------------------------------------------------
# Address (parsing)
# Address
# ------------------------------------------------------------------------------

BANNED_CHARS_IN_TARGET_NAME: FrozenSet
Expand Down Expand Up @@ -248,6 +250,103 @@ class PyExecutor:
def to_borrowed(self) -> PyExecutor: ...
def shutdown(self, duration_secs: float) -> None: ...

# ------------------------------------------------------------------------------
# Target
# ------------------------------------------------------------------------------

# Type alias to express the intent that the type should be immutable and hashable. There's nothing
# to actually enforce this, outside of convention.
ImmutableValue = Any
Comment on lines +257 to +259
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI The Python std library (and therefore core devs) have roughly said that hashability implies immutability. Search https://docs.python.org/3/library/dataclasses.html for "mutability".

So in that regard, this could be an alias for typing.Hashable

Copy link
Member Author

@stuhood stuhood Jun 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's potentially backwards incompatible... but assuming that mypy doesn't misbehave, it should only catch bugs? Will try it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That causes a number of type checking errors in our code (and more in user code most likely): will save for another change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What were the errors? That's concerning...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't look into it, but there were at least 10.

My goal with these port PRs is not to change behavior (unless it makes it easier to accomplish them), since my feeling right now is that getting out of our performance hole is the most important goal.


class _NoValue:
def __bool__(self) -> bool:
"""NB: Always returns `False`."""
...
def __repr__(self) -> str: ...

# Marker for unspecified field values that should use the default value if applicable.
NO_VALUE: _NoValue

class Field:
"""A Field.

The majority of fields should use field templates like `BoolField`, `StringField`, and
`StringSequenceField`. These subclasses will provide sensible type hints and validation
automatically.

If you are directly subclassing `Field`, you should likely override `compute_value()`
to perform any custom hydration and/or validation, such as converting unhashable types to
hashable types or checking for banned values. The returned value must be hashable
(and should be immutable) so that this Field may be used by the engine. This means, for
example, using tuples rather than lists and using `FrozenOrderedSet` rather than `set`.

If you plan to use the engine to fully hydrate the value, you can also inherit
`AsyncFieldMixin`, which will store an `address: Address` property on the `Field` instance.

Subclasses should also override the type hints for `value` and `raw_value` to be more precise
than `Any`. The type hint for `raw_value` is used to generate documentation, e.g. for
`./pants help $target_type`.

Set the `help` class property with a description, which will be used in `./pants help`. For the
best rendering, use soft wrapping (e.g. implicit string concatenation) within paragraphs, but
hard wrapping (`\n`) to separate distinct paragraphs and/or lists.

Example:

# NB: Really, this should subclass IntField. We only use Field as an example.
class Timeout(Field):
alias = "timeout"
value: Optional[int]
default = None
help = "A timeout field.\n\nMore information."

@classmethod
def compute_value(cls, raw_value: Optional[int], address: Address) -> Optional[int]:
value_or_default = super().compute_value(raw_value, address=address)
if value_or_default is not None and not isinstance(value_or_default, int):
raise ValueError(
"The `timeout` field expects an integer, but was given"
f"{value_or_default} for target {address}."
)
return value_or_default
"""

# Opt-in per field class to use a "no value" marker for the `raw_value` in `compute_value()` in
# case the field was not represented in the BUILD file.
#
# This will allow users to provide `None` as the field value (when applicable) without getting
# the fields default value.
none_is_valid_value: ClassVar[bool] = False

# Subclasses must define these.
alias: ClassVar[str]
help: ClassVar[str | Callable[[], str]]

# Subclasses must define at least one of these two.
default: ClassVar[ImmutableValue]
required: ClassVar[bool] = False

# Subclasses may define these.
removal_version: ClassVar[str | None] = None
removal_hint: ClassVar[str | None] = None

deprecated_alias: ClassVar[str | None] = None
deprecated_alias_removal_version: ClassVar[str | None] = None

value: ImmutableValue | None

def __init__(self, raw_value: Any | None, address: Address) -> None: ...
@classmethod
def compute_value(cls, raw_value: Any | None, address: Address) -> ImmutableValue:
"""Convert the `raw_value` into `self.value`.

You should perform any optional validation and/or hydration here. For example, you may want
to check that an integer is > 0 or convert an `Iterable[str]` to `List[str]`.

The resulting value must be hashable (and should be immutable).
"""
...

# ------------------------------------------------------------------------------
# FS
# ------------------------------------------------------------------------------
Expand Down
158 changes: 20 additions & 138 deletions src/python/pants/engine/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
get_type_hints,
)

from typing_extensions import Protocol, final
from typing_extensions import Protocol, Self, final

from pants.base.deprecated import warn_or_error
from pants.engine.addresses import Address, Addresses, UnparsedAddressInputs, assert_single_address
Expand All @@ -57,6 +57,8 @@
DependencyRuleActionDeniedError,
DependencyRuleApplication,
)
from pants.engine.internals.native_engine import NO_VALUE as NO_VALUE # noqa: F401
from pants.engine.internals.native_engine import Field as Field # noqa: F401
from pants.engine.unions import UnionMembership, UnionRule, distinct_union_type_per_subclass, union
from pants.option.global_options import UnmatchedBuildFileGlobs
from pants.source.filespec import Filespec, FilespecMatcher
Expand All @@ -79,141 +81,8 @@
ImmutableValue = Any


class _NoValue:
def __bool__(self) -> bool:
return False

def __repr__(self) -> str:
return "<NO_VALUE>"


# Marker for unspecified field values that should use the default value if applicable.
NO_VALUE = _NoValue()


@dataclass(frozen=True)
class Field:
"""A Field.

The majority of fields should use field templates like `BoolField`, `StringField`, and
`StringSequenceField`. These subclasses will provide sensible type hints and validation
automatically.

If you are directly subclassing `Field`, you should likely override `compute_value()`
to perform any custom hydration and/or validation, such as converting unhashable types to
hashable types or checking for banned values. The returned value must be hashable
(and should be immutable) so that this Field may be used by the engine. This means, for
example, using tuples rather than lists and using `FrozenOrderedSet` rather than `set`.

If you plan to use the engine to fully hydrate the value, you can also inherit
`AsyncFieldMixin`, which will store an `address: Address` property on the `Field` instance.

Subclasses should also override the type hints for `value` and `raw_value` to be more precise
than `Any`. The type hint for `raw_value` is used to generate documentation, e.g. for
`./pants help $target_type`.

Set the `help` class property with a description, which will be used in `./pants help`. For the
best rendering, use soft wrapping (e.g. implicit string concatenation) within paragraphs, but
hard wrapping (`\n`) to separate distinct paragraphs and/or lists.

Example:

# NB: Really, this should subclass IntField. We only use Field as an example.
class Timeout(Field):
alias = "timeout"
value: Optional[int]
default = None
help = "A timeout field.\n\nMore information."

@classmethod
def compute_value(cls, raw_value: Optional[int], address: Address) -> Optional[int]:
value_or_default = super().compute_value(raw_value, address=address)
if value_or_default is not None and not isinstance(value_or_default, int):
raise ValueError(
"The `timeout` field expects an integer, but was given"
f"{value_or_default} for target {address}."
)
return value_or_default
"""

# Opt-in per field class to use a "no value" marker for the `raw_value` in `compute_value()` in
# case the field was not represented in the BUILD file.
#
# This will allow users to provide `None` as the field value (when applicable) without getting
# the fields default value.
none_is_valid_value: ClassVar[bool] = False

# Subclasses must define these.
alias: ClassVar[str]
help: ClassVar[str | Callable[[], str]]

# Subclasses must define at least one of these two.
default: ClassVar[ImmutableValue]
required: ClassVar[bool] = False

# Subclasses may define these.
removal_version: ClassVar[str | None] = None
removal_hint: ClassVar[str | None] = None

deprecated_alias: ClassVar[str | None] = None
deprecated_alias_removal_version: ClassVar[str | None] = None

value: Optional[ImmutableValue]

@final
def __init__(self, raw_value: Optional[Any], address: Address) -> None:
if raw_value is NO_VALUE and not self.none_is_valid_value:
raw_value = None
self._check_deprecated(raw_value, address)

object.__setattr__(self, "value", self.compute_value(raw_value, address))

@classmethod
def compute_value(cls, raw_value: Optional[Any], address: Address) -> ImmutableValue:
"""Convert the `raw_value` into `self.value`.

You should perform any optional validation and/or hydration here. For example, you may want
to check that an integer is > 0 or convert an `Iterable[str]` to `List[str]`.

The resulting value must be hashable (and should be immutable).
"""
if raw_value is (NO_VALUE if cls.none_is_valid_value else None):
if cls.required:
raise RequiredFieldMissingException(address, cls.alias)
return cls.default
return raw_value

@classmethod
def _check_deprecated(cls, raw_value: Optional[Any], address: Address) -> None:
if not cls.removal_version or address.is_generated_target or raw_value in (NO_VALUE, None):
return
if not cls.removal_hint:
raise ValueError(
f"You specified `removal_version` for {cls}, but not the class property "
"`removal_hint`."
)
warn_or_error(
cls.removal_version,
entity=f"the {repr(cls.alias)} field",
hint=(f"Using the `{cls.alias}` field in the target {address}. {cls.removal_hint}"),
)

def __repr__(self) -> str:
params = [f"alias={self.alias!r}", f"value={self.value!r}"]
if hasattr(self, "default"):
params.append(f"default={self.default!r}")
return f"{self.__class__}({', '.join(params)})"

def __str__(self) -> str:
return f"{self.alias}={self.value}"

def __hash__(self) -> int:
return hash((self.__class__, self.value))


# NB: By subclassing `Field`, MyPy understands our type hints, and it means it doesn't matter which
# order you use for inheriting the field template vs. the mixin.
@dataclass(frozen=True)
class AsyncFieldMixin(Field):
"""A mixin to store the field's original `Address` for use during hydration by the engine.

Expand Down Expand Up @@ -262,13 +131,14 @@ def hydrate_sources(request: HydrateSourcesRequest) -> HydratedSources:

address: Address

@final # type: ignore[misc]
def __init__(self, raw_value: Optional[Any], address: Address) -> None:
@final
def __new__(cls, raw_value: Optional[Any], address: Address) -> Self:
obj = super().__new__(cls, raw_value, address) # type: ignore[call-arg]
# N.B.: We store the address here and not in the Field base class, because the memory usage
# of storing this value in every field was shown to be excessive / lead to performance
# issues.
object.__setattr__(self, "address", address)
super().__init__(raw_value, address)
object.__setattr__(obj, "address", address)
return obj

def __repr__(self) -> str:
params = [
Expand All @@ -283,6 +153,18 @@ def __repr__(self) -> str:
def __hash__(self) -> int:
return hash((self.__class__, self.value, self.address))

def __eq__(self, other: Any) -> bool:
if not isinstance(other, AsyncFieldMixin):
return False
return (
self.__class__ == other.__class__
and self.value == other.value
and self.address == other.address
)

def __ne__(self, other: Any) -> bool:
return not (self == other)


@union
@dataclass(frozen=True)
Expand Down
Loading