From 3abe29afab9fa625b08281e188761c035fa0f8e4 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 24 May 2023 06:48:01 +1000 Subject: [PATCH] Implement layout="zip" for Lambda/GCF, deprecating lambdex (#19076) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes #18879 by allowing the `python_awslambda` and `python_google_cloud_function` FaaS artefacts to be generated in "simple" format, using the `pex3 venv create --layout=flat-zipped` functionality recently added in PEX 2.1.135 (https://github.com/pantsbuild/pex/releases/tag/v2.1.135). This format is just: put everything at the top-level. For instance, the zip contains `cowsay/__init__.py` etc., rather than `.deps/cowsay-....whl`. This avoids the need to do the dynamic PEX initialisation/venv creation. This shifts the dynamic dependency computation/extraction/layout from run-time to build-time, relying on the FaaS environment to be generally consistent. It shouldn't change what actually happens after initialisation. This can: - reduce cold-starts noticeably: for instance, some of our lambdas spend 1s doing PEX/Lambdex start up. - reduce package size somewhat (the PEX `.bootstrap/` folder seems to be about 2MB uncompressed, ~1MB compressed). - increase build times. For instance, for one Python 3.9 Lambda in our codebase: | metric | before | after | |---|---|---| | init time on cold start | 2.3-2.5s | 1.3-1.4s (-1s) | | compressed size | 24.6MB | 23.8MB (-0.8MB) | | uncompressed size | 117.8MB | 115.8MB (-2.0MB) | | PEX-construction build time | ~5s | ~5s | | PEX-postprocessing build time | 0.14s | 4.8s | (The PEX-postprocessing time metric is specifically the time to run the `Setting up handler` (lambdex) or `Build python_awslambda` (`pex3 venv create`) process, computed by running `pants --keep-sandboxes=always package ...` for each layout, and then `hyperfine -r3 -w1 path/to/first/__run.sh path/to/second/__run.sh`. This _doesn't_ include the time to construct the input PEX, which is the same for both.) --- This functionality is driven by adding a new option to the `[lambdex].layout` option added in #19074. In #19074 (targeted for 2.17), it defaults `lambdex` (retaining the current code paths). This PR flips the default to the new option `zip`, which keys into the functionality above. I've tried to keep the non-lambdex implementation generally separate to the lambdex one, rather than reusing all of the code that happens to be common currently, because it'd make sense to deprecate/remove the lambdex functionality and thus I feel it's best for this new functionality to be mostly a fresh start. This PR's commits can be reviewed independently. I _think_ this is an acceptable MVP for this functionality, but there's various bits of follow-up: - add a warning about `files` being loaded into these packages, which has been temporarily lost (#19027) - adjust documentation #19067 - other improvements like #18195 and #18880 - improve performance, e.g. potentially `pex3 venv create ...` could use the lock file and sources to directly compute the appropriate files, without having to materialise a normal pex first This is a re-doing of #19022 with a simpler approach to deprecation, as discussed in https://github.com/pantsbuild/pants/pull/19074#discussion_r1199684733 and https://github.com/pantsbuild/pants/pull/19032#discussion_r1199622348. The phasing will be: | release | supports lambdex? | supports zip? | default layout | deprecation warnings | |---|---|---|---|---| | 2.17 (this PR) | ✅ | ✅ | lambdex | if `layout = "lambdex"` is implicit, tell people to set it: recommend `zip`, but allow `lambdex` if they have to | | 2.18 | ✅ | ✅ | zip | if `layout = "lambdex"` is set at all, tell people to remove it and switch to `zip` | | 2.19 | ❌ | ✅ | zip | none, migration over (or maybe just about removing the `[lambdex]` section entirely) | --- .../pants/backend/awslambda/python/rules.py | 39 +++- .../backend/awslambda/python/rules_test.py | 69 +++++- .../backend/awslambda/python/target_types.py | 9 + .../google_cloud_function/python/rules.py | 52 +++-- .../python/rules_test.py | 44 +++- .../python/target_types.py | 8 + .../backend/python/subsystems/lambdex.py | 48 +++++ .../pants/backend/python/util_rules/faas.py | 159 ++++++++++++-- .../backend/python/util_rules/faas_test.py | 23 +- .../backend/python/util_rules/pex_venv.py | 92 ++++++++ .../python/util_rules/pex_venv_test.py | 198 ++++++++++++++++++ 11 files changed, 687 insertions(+), 54 deletions(-) create mode 100644 src/python/pants/backend/python/util_rules/pex_venv.py create mode 100644 src/python/pants/backend/python/util_rules/pex_venv_test.py diff --git a/src/python/pants/backend/awslambda/python/rules.py b/src/python/pants/backend/awslambda/python/rules.py index 17b1165fe9b..54589a4aa05 100644 --- a/src/python/pants/backend/awslambda/python/rules.py +++ b/src/python/pants/backend/awslambda/python/rules.py @@ -12,8 +12,13 @@ PythonAwsLambdaIncludeRequirements, PythonAwsLambdaRuntime, ) -from pants.backend.python.util_rules import pex_from_targets -from pants.backend.python.util_rules.faas import BuildLambdexRequest, PythonFaaSCompletePlatforms +from pants.backend.python.subsystems.lambdex import Lambdex, LambdexLayout +from pants.backend.python.util_rules.faas import ( + BuildLambdexRequest, + BuildPythonFaaSRequest, + PythonFaaSCompletePlatforms, +) +from pants.backend.python.util_rules.faas import rules as faas_rules from pants.core.goals.package import BuiltPackage, OutputPathField, PackageFieldSet from pants.core.util_rules.environments import EnvironmentField from pants.engine.rules import Get, collect_rules, rule @@ -38,10 +43,30 @@ class PythonAwsLambdaFieldSet(PackageFieldSet): @rule(desc="Create Python AWS Lambda", level=LogLevel.DEBUG) async def package_python_awslambda( field_set: PythonAwsLambdaFieldSet, + lambdex: Lambdex, ) -> BuiltPackage: + if lambdex.layout is LambdexLayout.LAMBDEX: + return await Get( + BuiltPackage, + BuildLambdexRequest( + address=field_set.address, + target_name=PythonAWSLambda.alias, + complete_platforms=field_set.complete_platforms, + runtime=field_set.runtime, + handler=field_set.handler, + output_path=field_set.output_path, + include_requirements=field_set.include_requirements.value, + script_handler=None, + script_module=None, + # The AWS-facing handler function is always lambdex_handler.handler, which is the + # wrapper injected by lambdex that manages invocation of the actual handler. + handler_log_message="lambdex_handler.handler", + ), + ) + return await Get( BuiltPackage, - BuildLambdexRequest( + BuildPythonFaaSRequest( address=field_set.address, target_name=PythonAWSLambda.alias, complete_platforms=field_set.complete_platforms, @@ -49,11 +74,7 @@ async def package_python_awslambda( handler=field_set.handler, output_path=field_set.output_path, include_requirements=field_set.include_requirements.value, - script_handler=None, - script_module=None, - # The AWS-facing handler function is always lambdex_handler.handler, which is the - # wrapper injected by lambdex that manages invocation of the actual handler. - handler_log_message="lambdex_handler.handler", + reexported_handler_module=PythonAwsLambdaHandlerField.reexported_handler_module, ), ) @@ -62,5 +83,5 @@ def rules(): return [ *collect_rules(), UnionRule(PackageFieldSet, PythonAwsLambdaFieldSet), - *pex_from_targets.rules(), + *faas_rules(), ] diff --git a/src/python/pants/backend/awslambda/python/rules_test.py b/src/python/pants/backend/awslambda/python/rules_test.py index 5125a99e749..26312caffb9 100644 --- a/src/python/pants/backend/awslambda/python/rules_test.py +++ b/src/python/pants/backend/awslambda/python/rules_test.py @@ -122,7 +122,7 @@ def complete_platform(rule_runner: PythonRuleRunner) -> bytes: "major_minor_interpreter", all_major_minor_python_versions(Lambdex.default_interpreter_constraints), ) -def test_create_hello_world_lambda( +def test_create_hello_world_lambda_with_lambdex( rule_runner: PythonRuleRunner, major_minor_interpreter: str, complete_platform: str, caplog ) -> None: rule_runner.write_files( @@ -197,7 +197,7 @@ def handler(event, context): ), "Using include_requirements=False should exclude third-party deps" -def test_warn_files_targets(rule_runner: PythonRuleRunner, caplog) -> None: +def test_warn_files_targets_with_lambdex(rule_runner: PythonRuleRunner, caplog) -> None: rule_runner.write_files( { "assets/f.txt": "", @@ -257,3 +257,68 @@ def handler(event, context): assert "assets/f.txt:files" in caplog.text assert "assets:relocated" in caplog.text assert "assets:resources" not in caplog.text + + +def test_create_hello_world_lambda(rule_runner: PythonRuleRunner) -> None: + rule_runner.write_files( + { + "src/python/foo/bar/hello_world.py": dedent( + """ + import mureq + + def handler(event, context): + print('Hello, World!') + """ + ), + "src/python/foo/bar/BUILD": dedent( + """ + python_requirement(name="mureq", requirements=["mureq==0.2"]) + python_sources() + + python_awslambda( + name='lambda', + handler='foo.bar.hello_world:handler', + runtime="python3.7", + ) + python_awslambda( + name='slimlambda', + include_requirements=False, + handler='foo.bar.hello_world:handler', + runtime="python3.7", + ) + """ + ), + } + ) + + zip_file_relpath, content = create_python_awslambda( + rule_runner, + Address("src/python/foo/bar", target_name="lambda"), + expected_extra_log_lines=(" Handler: lambda_function.handler",), + extra_args=["--lambdex-layout=zip"], + ) + assert "src.python.foo.bar/lambda.zip" == zip_file_relpath + + zipfile = ZipFile(BytesIO(content)) + names = set(zipfile.namelist()) + assert "mureq/__init__.py" in names + assert "foo/bar/hello_world.py" in names + assert ( + zipfile.read("lambda_function.py") == b"from foo.bar.hello_world import handler as handler" + ) + + zip_file_relpath, content = create_python_awslambda( + rule_runner, + Address("src/python/foo/bar", target_name="slimlambda"), + expected_extra_log_lines=(" Handler: lambda_function.handler",), + extra_args=["--lambdex-layout=zip"], + ) + assert "src.python.foo.bar/slimlambda.zip" == zip_file_relpath + + zipfile = ZipFile(BytesIO(content)) + names = set(zipfile.namelist()) + assert "mureq/__init__.py" not in names + assert "foo/bar/hello_world.py" in names + assert ( + zipfile.read("lambda_function.py") == b"from foo.bar.hello_world import handler as handler" + ) diff --git a/src/python/pants/backend/awslambda/python/target_types.py b/src/python/pants/backend/awslambda/python/target_types.py index 477518fe33a..3a8a54b4832 100644 --- a/src/python/pants/backend/awslambda/python/target_types.py +++ b/src/python/pants/backend/awslambda/python/target_types.py @@ -29,11 +29,20 @@ class PythonAwsLambdaHandlerField(PythonFaaSHandlerField): + # This doesn't matter (just needs to be fixed), but is the default name used by the AWS + # console when creating a Python lambda, so is as good as any + # https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html + reexported_handler_module = "lambda_function" + help = help_text( f""" Entry point to the AWS Lambda handler. {PythonFaaSHandlerField.help} + + This is re-exported at `{reexported_handler_module}.handler` in the resulting package to be + used as the configured handler of the Lambda in AWS. It can also be accessed under its + source-root-relative module path, for example: `path.to.module.handler_func`. """ ) diff --git a/src/python/pants/backend/google_cloud_function/python/rules.py b/src/python/pants/backend/google_cloud_function/python/rules.py index 6314471d8d0..783c6700898 100644 --- a/src/python/pants/backend/google_cloud_function/python/rules.py +++ b/src/python/pants/backend/google_cloud_function/python/rules.py @@ -12,8 +12,13 @@ PythonGoogleCloudFunctionRuntime, PythonGoogleCloudFunctionType, ) -from pants.backend.python.util_rules import pex_from_targets -from pants.backend.python.util_rules.faas import BuildLambdexRequest, PythonFaaSCompletePlatforms +from pants.backend.python.subsystems.lambdex import Lambdex, LambdexLayout +from pants.backend.python.util_rules.faas import ( + BuildLambdexRequest, + BuildPythonFaaSRequest, + PythonFaaSCompletePlatforms, +) +from pants.backend.python.util_rules.faas import rules as faas_rules from pants.core.goals.package import BuiltPackage, OutputPathField, PackageFieldSet from pants.core.util_rules.environments import EnvironmentField from pants.engine.rules import Get, collect_rules, rule @@ -38,10 +43,36 @@ class PythonGoogleCloudFunctionFieldSet(PackageFieldSet): @rule(desc="Create Python Google Cloud Function", level=LogLevel.DEBUG) async def package_python_google_cloud_function( field_set: PythonGoogleCloudFunctionFieldSet, + lambdex: Lambdex, ) -> BuiltPackage: + if lambdex.layout is LambdexLayout.LAMBDEX: + return await Get( + BuiltPackage, + BuildLambdexRequest( + address=field_set.address, + target_name=PythonGoogleCloudFunction.alias, + complete_platforms=field_set.complete_platforms, + runtime=field_set.runtime, + handler=field_set.handler, + output_path=field_set.output_path, + include_requirements=True, + # The GCP-facing handler function is always `main.handler` (We pass `-M main.py -H handler` to + # Lambdex to ensure this), which is the wrapper injected by Lambdex that manages invocation of + # the actual user-supplied handler function. This arrangement works well since GCF assumes the + # handler function is housed in `main.py` in the root of the zip (you can re-direct this by + # setting a `GOOGLE_FUNCTION_SOURCE` Google Cloud build environment variable; e.g.: + # `gcloud functions deploy {--build-env-vars-file,--set-build-env-vars}`, but it's non-trivial + # to do this right or with intended effect) and the handler name you configure GCF with is just + # the unqualified function name, which we log here. + script_handler="handler", + script_module="main.py", + handler_log_message="handler", + ), + ) + return await Get( BuiltPackage, - BuildLambdexRequest( + BuildPythonFaaSRequest( address=field_set.address, target_name=PythonGoogleCloudFunction.alias, complete_platforms=field_set.complete_platforms, @@ -49,17 +80,8 @@ async def package_python_google_cloud_function( handler=field_set.handler, output_path=field_set.output_path, include_requirements=True, - # The GCP-facing handler function is always `main.handler` (We pass `-M main.py -H handler` to - # Lambdex to ensure this), which is the wrapper injected by Lambdex that manages invocation of - # the actual user-supplied handler function. This arrangement works well since GCF assumes the - # handler function is housed in `main.py` in the root of the zip (you can re-direct this by - # setting a `GOOGLE_FUNCTION_SOURCE` Google Cloud build environment variable; e.g.: - # `gcloud functions deploy {--build-env-vars-file,--set-build-env-vars}`, but it's non-trivial - # to do this right or with intended effect) and the handler name you configure GCF with is just - # the unqualified function name, which we log here. - script_handler="handler", - script_module="main.py", - handler_log_message="handler", + reexported_handler_module=PythonGoogleCloudFunctionHandlerField.reexported_handler_module, + log_only_reexported_handler_func=True, ), ) @@ -68,5 +90,5 @@ def rules(): return [ *collect_rules(), UnionRule(PackageFieldSet, PythonGoogleCloudFunctionFieldSet), - *pex_from_targets.rules(), + *faas_rules(), ] diff --git a/src/python/pants/backend/google_cloud_function/python/rules_test.py b/src/python/pants/backend/google_cloud_function/python/rules_test.py index 8d796dade93..e74336ce26c 100644 --- a/src/python/pants/backend/google_cloud_function/python/rules_test.py +++ b/src/python/pants/backend/google_cloud_function/python/rules_test.py @@ -130,7 +130,7 @@ def complete_platform(rule_runner: PythonRuleRunner) -> bytes: "major_minor_interpreter", all_major_minor_python_versions(Lambdex.default_interpreter_constraints), ) -def test_create_hello_world_lambda( +def test_create_hello_world_lambda_with_lambdex( rule_runner: PythonRuleRunner, major_minor_interpreter: str, complete_platform: str, caplog ) -> None: rule_runner.write_files( @@ -243,3 +243,45 @@ def handler(event, context): assert "assets/f.txt:files" in caplog.text assert "assets:relocated" in caplog.text assert "assets:resources" not in caplog.text + + +def test_create_hello_world_gcf(rule_runner: PythonRuleRunner) -> None: + rule_runner.write_files( + { + "src/python/foo/bar/hello_world.py": dedent( + """ + import mureq + + def handler(event, context): + print('Hello, World!') + """ + ), + "src/python/foo/bar/BUILD": dedent( + """ + python_requirement(name="mureq", requirements=["mureq==0.2"]) + python_sources() + + python_google_cloud_function( + name='gcf', + handler='foo.bar.hello_world:handler', + runtime="python37", + type='event', + ) + """ + ), + } + ) + + zip_file_relpath, content = create_python_google_cloud_function( + rule_runner, + Address("src/python/foo/bar", target_name="gcf"), + expected_extra_log_lines=(" Handler: handler",), + extra_args=["--lambdex-layout=zip"], + ) + assert "src.python.foo.bar/gcf.zip" == zip_file_relpath + + zipfile = ZipFile(BytesIO(content)) + names = set(zipfile.namelist()) + assert "mureq/__init__.py" in names + assert "foo/bar/hello_world.py" in names + assert zipfile.read("main.py") == b"from foo.bar.hello_world import handler as handler" diff --git a/src/python/pants/backend/google_cloud_function/python/target_types.py b/src/python/pants/backend/google_cloud_function/python/target_types.py index 559dcca4fc9..7267727f475 100644 --- a/src/python/pants/backend/google_cloud_function/python/target_types.py +++ b/src/python/pants/backend/google_cloud_function/python/target_types.py @@ -29,11 +29,19 @@ class PythonGoogleCloudFunctionHandlerField(PythonFaaSHandlerField): + # GCP requires "Your main file must be named main.py" + # https://cloud.google.com/functions/docs/writing#directory-structure-python + reexported_handler_module = "main" + help = help_text( f""" Entry point to the Google Cloud Function handler. {PythonFaaSHandlerField.help} + + This is re-exported at `{reexported_handler_module}.handler` in the resulting package to + used as the configured handler of the Google Cloud Function in GCP. It can also be accessed + under its source-root-relative module path, for example: `path.to.module.handler_func`. """ ) diff --git a/src/python/pants/backend/python/subsystems/lambdex.py b/src/python/pants/backend/python/subsystems/lambdex.py index 6b70ab6c3db..84b1aa51800 100644 --- a/src/python/pants/backend/python/subsystems/lambdex.py +++ b/src/python/pants/backend/python/subsystems/lambdex.py @@ -1,9 +1,19 @@ # Copyright 2019 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). +from enum import Enum + from pants.backend.python.subsystems.python_tool_base import LockfileRules, PythonToolBase from pants.backend.python.target_types import ConsoleScript +from pants.base.deprecated import warn_or_error from pants.engine.rules import collect_rules +from pants.option.option_types import EnumOption +from pants.util.strutil import softwrap + + +class LambdexLayout(Enum): + LAMBDEX = "lambdex" + ZIP = "zip" class Lambdex(PythonToolBase): @@ -20,6 +30,44 @@ class Lambdex(PythonToolBase): default_lockfile_resource = ("pants.backend.python.subsystems", "lambdex.lock") lockfile_rules_type = LockfileRules.SIMPLE + layout = EnumOption( + default=LambdexLayout.LAMBDEX, + help=softwrap( + """ + Explicitly control the layout used for `python_awslambda` and + `python_google_cloud_function` targets. This option exists for the transition from + Lambdex-based layout to the plain zip layout, as recommended by cloud vendors. + """ + ), + ) + + def warn_for_layout(self, target_alias: str) -> None: + if self.options.is_default("layout"): + lambda_message = ( + " (you will need to also update the handlers configured in the cloud from `lambdex_handler.handler` to `lambda_function.handler`)" + if target_alias == "python_awslambda" + else "" + ) + + warn_or_error( + "2.19.0.dev0", + f"using the Lambdex layout for `{target_alias}` targets", + softwrap( + f""" + Set the `[lambdex].layout` option explicitly to `zip` (recommended) or `lambdex` + (compatibility), in `pants.toml`. Recommended: set to `zip` to opt-in to the new + layout recommended by cloud vendors{lambda_message}: + + [lambdex] + layout = "zip" + + You can also explicitly set `layout = "lambdex"` to silence this warning and + continue using the Lambdex-based layout in this release of Pants. This layout + will disappear in future. + """ + ), + ) + def rules(): return collect_rules() diff --git a/src/python/pants/backend/python/util_rules/faas.py b/src/python/pants/backend/python/util_rules/faas.py index c2eacefd16c..21ff0ecd99b 100644 --- a/src/python/pants/backend/python/util_rules/faas.py +++ b/src/python/pants/backend/python/util_rules/faas.py @@ -8,6 +8,7 @@ import os.path from abc import ABC, abstractmethod from dataclasses import dataclass +from pathlib import Path from typing import Optional, cast from pants.backend.python.dependency_inference.module_mapper import ( @@ -21,7 +22,11 @@ ) from pants.backend.python.subsystems.lambdex import Lambdex from pants.backend.python.subsystems.setup import PythonSetup -from pants.backend.python.target_types import PexCompletePlatformsField, PythonResolveField +from pants.backend.python.target_types import ( + PexCompletePlatformsField, + PexLayout, + PythonResolveField, +) from pants.backend.python.util_rules.pex import ( CompletePlatforms, Pex, @@ -31,10 +36,20 @@ VenvPexProcess, ) from pants.backend.python.util_rules.pex_from_targets import PexFromTargetsRequest +from pants.backend.python.util_rules.pex_from_targets import rules as pex_from_targets_rules +from pants.backend.python.util_rules.pex_venv import PexVenv, PexVenvLayout, PexVenvRequest +from pants.backend.python.util_rules.pex_venv import rules as pex_venv_rules from pants.core.goals.package import BuiltPackage, BuiltPackageArtifact, OutputPathField from pants.core.target_types import FileSourceField from pants.engine.addresses import Address, UnparsedAddressInputs -from pants.engine.fs import GlobMatchErrorBehavior, PathGlobs, Paths +from pants.engine.fs import ( + CreateDigest, + Digest, + FileContent, + GlobMatchErrorBehavior, + PathGlobs, + Paths, +) from pants.engine.platform import Platform from pants.engine.process import ProcessResult from pants.engine.rules import Get, MultiGet, collect_rules, rule @@ -97,7 +112,8 @@ def filespec(self) -> Filespec: @dataclass(frozen=True) class ResolvedPythonFaaSHandler: - val: str + module: str + func: str file_name_used: bool @@ -118,7 +134,7 @@ async def resolve_python_faas_handler( # If it's already a module, simply use that. Otherwise, convert the file name into a module # path. if not path.endswith(".py"): - return ResolvedPythonFaaSHandler(handler_val, file_name_used=False) + return ResolvedPythonFaaSHandler(module=path, func=func, file_name_used=False) # Use the engine to validate that the file exists and that it resolves to only one file. full_glob = os.path.join(address.spec_path, path) @@ -147,7 +163,7 @@ async def resolve_python_faas_handler( stripped_source_path = os.path.relpath(handler_path, source_root.path) module_base, _ = os.path.splitext(stripped_source_path) normalized_path = module_base.replace(os.path.sep, ".") - return ResolvedPythonFaaSHandler(f"{normalized_path}:{func}", file_name_used=True) + return ResolvedPythonFaaSHandler(module=normalized_path, func=func, file_name_used=True) class PythonFaaSDependencies(Dependencies): @@ -187,7 +203,6 @@ async def infer_faas_handler_dependency( ResolvePythonFaaSHandlerRequest(request.field_set.handler), ), ) - module, _, _func = handler.val.partition(":") # Only set locality if needed, to avoid unnecessary rule graph memoization misses. # When set, use the source root, which is useful in practice, but incurs fewer memoization @@ -202,7 +217,7 @@ async def infer_faas_handler_dependency( owners = await Get( PythonModuleOwners, PythonModuleOwnersRequest( - module, + handler.module, resolve=request.field_set.resolve.normalized_value(python_setup), locality=locality, ), @@ -218,7 +233,7 @@ async def infer_faas_handler_dependency( context=( f"The target {address} has the field " f"`handler={repr(request.field_set.handler.value)}`, which maps " - f"to the Python module `{module}`" + f"to the Python module `{handler.module}`" ), ) maybe_disambiguated = explicitly_provided_deps.disambiguated( @@ -249,6 +264,21 @@ class PythonFaaSRuntimeField(StringField, ABC): def to_interpreter_version(self) -> None | tuple[int, int]: """Returns the Python version implied by the runtime, as (major, minor).""" + def to_platform_string(self) -> None | str: + # We hardcode the platform value to the appropriate one for each FaaS runtime. + # (Running the "hello world" cloud function in the example code will report the platform, and can be + # used to verify correctness of these platform strings.) + interpreter_version = self.to_interpreter_version() + if interpreter_version is None: + return None + + py_major, py_minor = interpreter_version + platform_str = f"linux_x86_64-cp-{py_major}{py_minor}-cp{py_major}{py_minor}" + # set pymalloc ABI flag - this was removed in python 3.8 https://bugs.python.org/issue36707 + if py_major <= 3 and py_minor < 8: + platform_str += "m" + return platform_str + @rule async def digest_complete_platforms( @@ -293,24 +323,15 @@ async def build_lambdex( f" {bin_name()} package. (See https://realpython.com/python-wheels/ for more about" " wheels.)\n\n(If the build does not raise an exception, it's safe to use macOS.)" ) + lambdex.warn_for_layout(request.target_name) output_filename = request.output_path.value_or_default( # FaaS typically use the .zip suffix, so we use that instead of .pex. file_ending="zip", ) - # We hardcode the platform value to the appropriate one for each FaaS runtime. - # (Running the "hello world" cloud function in the example code will report the platform, and can be - # used to verify correctness of these platform strings.) - pex_platforms = [] - interpreter_version = request.runtime.to_interpreter_version() - if interpreter_version: - py_major, py_minor = interpreter_version - platform_str = f"linux_x86_64-cp-{py_major}{py_minor}-cp{py_major}{py_minor}" - # set pymalloc ABI flag - this was removed in python 3.8 https://bugs.python.org/issue36707 - if py_major <= 3 and py_minor < 8: - platform_str += "m" - pex_platforms.append(platform_str) + platform_str = request.runtime.to_platform_string() + pex_platforms = [platform_str] if platform_str else [] additional_pex_args = ( # Ensure we can resolve manylinux wheels in addition to any AMI-specific wheels. @@ -359,7 +380,7 @@ async def build_lambdex( f"\n\nFiles targets dependencies: {files_addresses}" ) - lambdex_args = ["build", "-e", handler.val, output_filename] + lambdex_args = ["build", "-e", f"{handler.module}:{handler.func}", output_filename] if request.script_handler: lambdex_args.extend(("-H", request.script_handler)) if request.script_module: @@ -393,9 +414,105 @@ async def build_lambdex( return BuiltPackage(digest=result.output_digest, artifacts=(artifact,)) +@dataclass(frozen=True) +class BuildPythonFaaSRequest: + address: Address + target_name: str + + complete_platforms: PythonFaaSCompletePlatforms + handler: PythonFaaSHandlerField + output_path: OutputPathField + runtime: PythonFaaSRuntimeField + + include_requirements: bool + + reexported_handler_module: str + log_only_reexported_handler_func: bool = False + + +@rule +async def build_python_faas( + request: BuildPythonFaaSRequest, +) -> BuiltPackage: + platform_str = request.runtime.to_platform_string() + pex_platforms = PexPlatforms([platform_str] if platform_str else []) + + additional_pex_args = ( + # Ensure we can resolve manylinux wheels in addition to any AMI-specific wheels. + "--manylinux=manylinux2014", + # When we're executing Pex on Linux, allow a local interpreter to be resolved if + # available and matching the AMI platform. + "--resolve-local-platforms", + ) + + complete_platforms, handler = await MultiGet( + Get(CompletePlatforms, PythonFaaSCompletePlatforms, request.complete_platforms), + Get(ResolvedPythonFaaSHandler, ResolvePythonFaaSHandlerRequest(request.handler)), + ) + + # TODO: improve diagnostics if there's more than one platform/complete_platform + + # synthesise a source file that gives a fixed handler path, no matter what the entry point is: + # some platforms require a certain name (e.g. GCF), and even on others, giving a fixed name + # means users don't need to duplicate the entry_point config in both the pants BUILD file and + # infrastructure definitions (the latter can always use the same names, for every lambda). + reexported_handler_file = f"{request.reexported_handler_module}.py" + reexported_handler_func = "handler" + reexported_handler_content = ( + f"from {handler.module} import {handler.func} as {reexported_handler_func}" + ) + additional_sources = await Get( + Digest, + CreateDigest([FileContent(reexported_handler_file, reexported_handler_content.encode())]), + ) + + repository_filename = "faas_repository.pex" + pex_request = PexFromTargetsRequest( + addresses=[request.address], + internal_only=False, + include_requirements=request.include_requirements, + output_filename=repository_filename, + platforms=pex_platforms, + complete_platforms=complete_platforms, + layout=PexLayout.PACKED, + additional_args=additional_pex_args, + additional_lockfile_args=additional_pex_args, + additional_sources=additional_sources, + ) + + pex_result = await Get(Pex, PexFromTargetsRequest, pex_request) + + output_filename = request.output_path.value_or_default(file_ending="zip") + + result = await Get( + PexVenv, + PexVenvRequest( + pex=pex_result, + layout=PexVenvLayout.FLAT_ZIPPED, + platforms=pex_platforms, + complete_platforms=complete_platforms, + output_path=Path(output_filename), + description=f"Build {request.target_name} artifact for {request.address}", + ), + ) + + if request.log_only_reexported_handler_func: + handler_text = reexported_handler_func + else: + handler_text = f"{request.reexported_handler_module}.{reexported_handler_func}" + + artifact = BuiltPackageArtifact( + output_filename, + extra_log_lines=(f" Handler: {handler_text}",), + ) + return BuiltPackage(digest=result.digest, artifacts=(artifact,)) + + def rules(): return ( *collect_rules(), *import_rules(), + *pex_venv_rules(), + *pex_from_targets_rules(), UnionRule(InferDependenciesRequest, InferPythonFaaSHandlerDependency), ) diff --git a/src/python/pants/backend/python/util_rules/faas_test.py b/src/python/pants/backend/python/util_rules/faas_test.py index 2df1d634685..ae00dcdf06c 100644 --- a/src/python/pants/backend/python/util_rules/faas_test.py +++ b/src/python/pants/backend/python/util_rules/faas_test.py @@ -66,7 +66,9 @@ def test_handler_filespec(handler: str, expected: List[str]) -> None: def test_resolve_handler(rule_runner: RuleRunner) -> None: - def assert_resolved(handler: str, *, expected: str, is_file: bool) -> None: + def assert_resolved( + handler: str, *, expected_module: str, expected_func: str, is_file: bool + ) -> None: addr = Address("src/python/project") rule_runner.write_files( {"src/python/project/lambda.py": "", "src/python/project/f2.py": ""} @@ -75,17 +77,26 @@ def assert_resolved(handler: str, *, expected: str, is_file: bool) -> None: result = rule_runner.request( ResolvedPythonFaaSHandler, [ResolvePythonFaaSHandlerRequest(field)] ) - assert result.val == expected + assert result.module == expected_module + assert result.func == expected_func assert result.file_name_used == is_file - assert_resolved("path.to.lambda:func", expected="path.to.lambda:func", is_file=False) - assert_resolved("lambda.py:func", expected="project.lambda:func", is_file=True) + assert_resolved( + "path.to.lambda:func", expected_module="path.to.lambda", expected_func="func", is_file=False + ) + assert_resolved( + "lambda.py:func", expected_module="project.lambda", expected_func="func", is_file=True + ) with engine_error(contains="Unmatched glob"): - assert_resolved("doesnt_exist.py:func", expected="doesnt matter", is_file=True) + assert_resolved( + "doesnt_exist.py:func", expected_module="doesnt matter", expected_func="", is_file=True + ) # Resolving >1 file is an error. with engine_error(InvalidFieldException): - assert_resolved("*.py:func", expected="doesnt matter", is_file=True) + assert_resolved( + "*.py:func", expected_module="doesnt matter", expected_func="", is_file=True + ) def test_infer_handler_dependency(rule_runner: RuleRunner, caplog) -> None: diff --git a/src/python/pants/backend/python/util_rules/pex_venv.py b/src/python/pants/backend/python/util_rules/pex_venv.py new file mode 100644 index 00000000000..faf0a27ce8c --- /dev/null +++ b/src/python/pants/backend/python/util_rules/pex_venv.py @@ -0,0 +1,92 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + +from pants.backend.python.util_rules import pex_cli +from pants.backend.python.util_rules.pex import CompletePlatforms, Pex, PexPlatforms +from pants.backend.python.util_rules.pex_cli import PexCliProcess +from pants.engine.fs import Digest +from pants.engine.internals.native_engine import MergeDigests +from pants.engine.process import ProcessResult +from pants.engine.rules import Get, collect_rules, rule + + +class PexVenvLayout(Enum): + VENV = "venv" + FLAT = "flat" + FLAT_ZIPPED = "flat-zipped" + + +@dataclass(frozen=True) +class PexVenvRequest: + pex: Pex + layout: PexVenvLayout + output_path: Path + description: str + + platforms: PexPlatforms = PexPlatforms() + complete_platforms: CompletePlatforms = CompletePlatforms() + + +@dataclass(frozen=True) +class PexVenv: + digest: Digest + path: Path + + +@rule +async def pex_venv(request: PexVenvRequest) -> PexVenv: + # TODO: create the output with a fixed name and then rename + # (https://github.com/pantsbuild/pants/issues/15102) + if request.layout is PexVenvLayout.FLAT_ZIPPED: + # --layout=flat-zipped takes --dest-dir=foo and zips it up to `foo.zip`, so we cannot + # directly control the full path until we do a rename + if request.output_path.suffix != ".zip": + raise ValueError( + f"layout=FLAT_ZIPPED requires output_path to end in '.zip', but found output_path='{request.output_path}' ending in {request.output_path.suffix!r}" + ) + dest_dir = request.output_path.with_suffix("") + output_files = [str(request.output_path)] + output_directories = [] + else: + dest_dir = request.output_path + output_files = [] + output_directories = [str(request.output_path)] + + input_digest = await Get( + Digest, + MergeDigests( + [ + request.pex.digest, + request.complete_platforms.digest, + ] + ), + ) + + result = await Get( + ProcessResult, + PexCliProcess( + subcommand=("venv", "create"), + extra_args=( + f"--dest-dir={dest_dir}", + f"--pex-repository={request.pex.name}", + f"--layout={request.layout.value}", + # NB. Specifying more than one of these args doesn't make sense for `venv + # create`. Incorrect usage will be surfaced as a subprocess failure. + *request.platforms.generate_pex_arg_list(), + *request.complete_platforms.generate_pex_arg_list(), + ), + additional_input_digest=input_digest, + output_files=output_files, + output_directories=output_directories, + description=request.description, + ), + ) + + return PexVenv(digest=result.output_digest, path=request.output_path) + + +def rules(): + return [*collect_rules(), *pex_cli.rules()] diff --git a/src/python/pants/backend/python/util_rules/pex_venv_test.py b/src/python/pants/backend/python/util_rules/pex_venv_test.py new file mode 100644 index 00000000000..22dc529d896 --- /dev/null +++ b/src/python/pants/backend/python/util_rules/pex_venv_test.py @@ -0,0 +1,198 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +import fnmatch +import io +import zipfile +from pathlib import Path + +import pytest + +from pants.backend.python.util_rules import pex_test_utils +from pants.backend.python.util_rules.pex import CompletePlatforms, Pex, PexPlatforms +from pants.backend.python.util_rules.pex import rules as pex_rules +from pants.backend.python.util_rules.pex_requirements import PexRequirements +from pants.backend.python.util_rules.pex_test_utils import create_pex_and_get_all_data +from pants.backend.python.util_rules.pex_venv import PexVenv, PexVenvLayout, PexVenvRequest +from pants.backend.python.util_rules.pex_venv import rules as pex_venv_rules +from pants.engine.fs import CreateDigest, DigestContents, FileContent +from pants.engine.internals.native_engine import Digest, Snapshot +from pants.engine.internals.scheduler import ExecutionError +from pants.testutil.rule_runner import QueryRule, RuleRunner + + +@pytest.fixture +def rule_runner() -> RuleRunner: + return RuleRunner( + rules=[ + *pex_test_utils.rules(), + *pex_rules(), + *pex_venv_rules(), + QueryRule(PexVenv, (PexVenvRequest,)), + QueryRule(Snapshot, (CreateDigest,)), + ], + ) + + +requirements = PexRequirements(["psycopg2-binary==2.9.6"]) + + +@pytest.fixture +def sources(rule_runner: RuleRunner) -> Digest: + return rule_runner.request( + Digest, [CreateDigest([FileContent(path="first/party.py", content=b"")])] + ) + + +@pytest.fixture +def local_pex(rule_runner: RuleRunner, sources: Digest) -> Pex: + result = create_pex_and_get_all_data( + rule_runner, + requirements=requirements, + sources=sources, + internal_only=False, + ) + assert isinstance(result.pex, Pex) + return result.pex + + +# at least one of these will be foreign +WIN_311 = "win-amd64-cp-311-cp311" +MAC_310 = "macosx_11_0-arm64-cp-310-cp310" + +# subset of the complete platforms for MAC_310 +MAC_310_CP = b"""{"path": "....", "compatible_tags": ["cp310-cp310-macosx_12_0_arm64", "cp310-cp310-macosx_12_0_universal2", "cp310-cp310-macosx_11_0_arm64", "py31-none-any", "py30-none-any"], "marker_environment": {"implementation_name": "cpython", "implementation_version": "3.10.10", "os_name": "posix", "platform_machine": "arm64", "platform_python_implementation": "CPython", "platform_release": "21.6.0", "platform_system": "Darwin", "platform_version": "Darwin Kernel Version 21.6.0: Wed Aug 10 14:28:35 PDT 2022; root:xnu-8020.141.5~2/RELEASE_ARM64_T8101", "python_full_version": "3.10.10", "python_version": "3.10", "sys_platform": "darwin"}}""" + + +@pytest.fixture +def foreign_pex(rule_runner: RuleRunner, sources: Digest) -> Pex: + result = create_pex_and_get_all_data( + rule_runner, + requirements=requirements, + sources=sources, + platforms=PexPlatforms([WIN_311, MAC_310]), + internal_only=False, + ) + assert isinstance(result.pex, Pex) + return result.pex + + +def run_and_validate( + rule_runner: RuleRunner, request: PexVenvRequest, check_globs_exist: tuple[str, ...] +) -> PexVenv: + venv = rule_runner.request(PexVenv, [request]) + + assert venv.path == request.output_path + + snapshot = rule_runner.request(Snapshot, [venv.digest]) + for glob in check_globs_exist: + assert len(fnmatch.filter(snapshot.files, glob)) == 1, glob + + return venv + + +@pytest.mark.parametrize( + ("layout", "expected_directory"), + [(PexVenvLayout.FLAT, ""), (PexVenvLayout.VENV, "lib/python*/site-packages/")], +) +def test_layout_venv_and_flat_should_give_plausible_output_for_local_platform( + layout: PexVenvLayout, expected_directory: str, local_pex: Pex, rule_runner: RuleRunner +) -> None: + run_and_validate( + rule_runner, + PexVenvRequest( + pex=local_pex, layout=layout, output_path=Path("out/dir"), description="testing" + ), + check_globs_exist=( + f"out/dir/{expected_directory}psycopg2/__init__.py", + f"out/dir/{expected_directory}first/party.py", + ), + ) + + +def test_layout_flat_zipped_should_give_plausible_output_for_local_platform( + local_pex: Pex, rule_runner: RuleRunner +) -> None: + venv = run_and_validate( + rule_runner, + PexVenvRequest( + pex=local_pex, + layout=PexVenvLayout.FLAT_ZIPPED, + output_path=Path("out/file.zip"), + description="testing", + ), + check_globs_exist=("out/file.zip",), + ) + + contents = rule_runner.request(DigestContents, [venv.digest]) + assert len(contents) == 1 + with zipfile.ZipFile(io.BytesIO(contents[0].content)) as f: + files = set(f.namelist()) + assert "psycopg2/__init__.py" in files + assert "first/party.py" in files + + +def test_layout_flat_zipped_should_require_zip_suffix( + local_pex: Pex, rule_runner: RuleRunner +) -> None: + with pytest.raises( + ExecutionError, + match="layout=FLAT_ZIPPED requires output_path to end in '\\.zip', but found output_path='out/file\\.other' ending in '\\.other'", + ): + run_and_validate( + rule_runner, + PexVenvRequest( + pex=local_pex, + layout=PexVenvLayout.FLAT_ZIPPED, + output_path=Path("out/file.other"), + description="testing", + ), + check_globs_exist=(), + ) + + +def test_platforms_should_choose_appropriate_dependencies_when_possible( + foreign_pex: Pex, rule_runner: RuleRunner +) -> None: + # smoke test that platforms are passed through in the right way + run_and_validate( + rule_runner, + PexVenvRequest( + pex=foreign_pex, + layout=PexVenvLayout.FLAT, + output_path=Path("out"), + platforms=PexPlatforms([WIN_311]), + description="testing", + ), + check_globs_exist=( + "out/first/party.py", + "out/psycopg2/__init__.py", + ), + ) + + +def test_complete_platforms_should_choose_appropriate_dependencies_when_possible( + foreign_pex: Pex, + rule_runner: RuleRunner, +) -> None: + # smoke test that complete platforms are passed through in the right way + cp_snapshot = rule_runner.request( + Snapshot, [CreateDigest([FileContent("cp", content=MAC_310_CP)])] + ) + + run_and_validate( + rule_runner, + PexVenvRequest( + pex=foreign_pex, + layout=PexVenvLayout.FLAT, + output_path=Path("out"), + complete_platforms=CompletePlatforms.from_snapshot(cp_snapshot), + description="testing", + ), + check_globs_exist=( + "out/first/party.py", + "out/psycopg2/__init__.py", + ), + )