From d3d325777952435186be42443fb28fde6771fae7 Mon Sep 17 00:00:00 2001 From: Joshua Cannon Date: Tue, 14 Mar 2023 14:58:26 -0500 Subject: [PATCH] Always provide Python-for-Pants-scripts (Cherry-pick of #18433) (#18495) This PR decouples the Python Pants uses for its own nefarious purposes (like running PEX or `gunzip`) from the user search paths by either using `sys.executable` locally or downloading and using Python Build Standalone in a Docker environment. Additionally when making this change, the Python/pex code was refactored so that we always use this Python to run pex, with Python either being chosen by pex, or by using `PEX_PYTHON` env var at runtime. I think this is a nice cleanup of the handshake between `CompletePexEnvironment.create_argv` and `CompletePexEnvironment.environment_dict` used to have. On a side note, this (with `scie-pants` and https://github.com/pantsbuild/pants/pull/18352) this marks a complete divorce from any Python on the user system (or lack thereof) and running Python code. --- pants.toml | 2 + .../pants/backend/python/goals/export.py | 3 +- src/python/pants/backend/python/goals/repl.py | 12 +- .../pants/backend/python/goals/run_helper.py | 12 +- .../python/goals/run_python_requirement.py | 2 +- .../providers/pyenv/rules_integration_test.py | 3 +- .../backend/python/util_rules/local_dists.py | 2 +- .../python/util_rules/local_dists_test.py | 15 +- .../pants/backend/python/util_rules/pex.py | 11 +- .../backend/python/util_rules/pex_cli.py | 31 ++-- .../python/util_rules/pex_environment.py | 62 +++----- src/python/pants/core/register.py | 2 + .../pants/core/subsystems/python_bootstrap.py | 48 +++++- .../pants/core/util_rules/adhoc_binaries.py | 139 ++++++++++++++++++ .../core/util_rules/adhoc_binaries_test.py | 78 ++++++++++ src/python/pants/core/util_rules/archive.py | 10 +- .../pants/jvm/resolve/coursier_setup.py | 10 +- .../pants/testutil/pants_integration_test.py | 9 +- src/python/pants/testutil/rule_runner.py | 2 + 19 files changed, 359 insertions(+), 94 deletions(-) create mode 100644 src/python/pants/core/util_rules/adhoc_binaries.py create mode 100644 src/python/pants/core/util_rules/adhoc_binaries_test.py diff --git a/pants.toml b/pants.toml index e08a96243bd..c10b65c0c00 100644 --- a/pants.toml +++ b/pants.toml @@ -137,6 +137,8 @@ venv_use_symlinks = true # `python_distrobution` targets, currently: # + src/python/pants:pants-packaged # + src/python/pants/testutil:testutil_wheel +# And update the PythonBuildStandalone version/URL: +# + src/python/pants/core/subsystems/python_bootstrap.py interpreter_constraints = [">=3.7,<3.10"] macos_big_sur_compatibility = true enable_resolves = true diff --git a/src/python/pants/backend/python/goals/export.py b/src/python/pants/backend/python/goals/export.py index ff7dec80b3b..96fca3226f5 100644 --- a/src/python/pants/backend/python/goals/export.py +++ b/src/python/pants/backend/python/goals/export.py @@ -233,10 +233,9 @@ async def do_export( "--collisions-ok", output_path, ], - python=requirements_pex.python, ), { - **complete_pex_env.environment_dict(python_configured=True), + **complete_pex_env.environment_dict(python=requirements_pex.python), "PEX_MODULE": "pex.tools", }, ), diff --git a/src/python/pants/backend/python/goals/repl.py b/src/python/pants/backend/python/goals/repl.py index 095b71572e5..0d3c6c4d50d 100644 --- a/src/python/pants/backend/python/goals/repl.py +++ b/src/python/pants/backend/python/goals/repl.py @@ -109,13 +109,11 @@ async def create_python_repl_request( ) complete_pex_env = pex_env.in_workspace() - args = complete_pex_env.create_argv( - request.in_chroot(requirements_pex.name), python=requirements_pex.python - ) + args = complete_pex_env.create_argv(request.in_chroot(requirements_pex.name)) chrooted_source_roots = [request.in_chroot(sr) for sr in sources.source_roots] extra_env = { - **complete_pex_env.environment_dict(python_configured=requirements_pex.python is not None), + **complete_pex_env.environment_dict(python=requirements_pex.python), "PEX_EXTRA_SYS_PATH": ":".join(chrooted_source_roots), "PEX_PATH": request.in_chroot(local_dists.pex.name), "PEX_INTERPRETER_HISTORY": "1" if python_setup.repl_history else "0", @@ -175,15 +173,13 @@ async def create_ipython_repl_request( ) complete_pex_env = pex_env.in_workspace() - args = list( - complete_pex_env.create_argv(request.in_chroot(ipython_pex.name), python=ipython_pex.python) - ) + args = list(complete_pex_env.create_argv(request.in_chroot(ipython_pex.name))) if ipython.ignore_cwd: args.append("--ignore-cwd") chrooted_source_roots = [request.in_chroot(sr) for sr in sources.source_roots] extra_env = { - **complete_pex_env.environment_dict(python_configured=ipython_pex.python is not None), + **complete_pex_env.environment_dict(python=ipython_pex.python), "PEX_PATH": os.pathsep.join( [ request.in_chroot(requirements_pex.name), diff --git a/src/python/pants/backend/python/goals/run_helper.py b/src/python/pants/backend/python/goals/run_helper.py index f9c44f24f23..32da4a4a1bb 100644 --- a/src/python/pants/backend/python/goals/run_helper.py +++ b/src/python/pants/backend/python/goals/run_helper.py @@ -14,8 +14,9 @@ ResolvedPexEntryPoint, ResolvePexEntryPointRequest, ) +from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints from pants.backend.python.util_rules.pex import Pex, PexRequest, VenvPex, VenvPexRequest -from pants.backend.python.util_rules.pex_environment import PexEnvironment +from pants.backend.python.util_rules.pex_environment import PexEnvironment, PythonExecutable from pants.backend.python.util_rules.pex_from_targets import PexFromTargetsRequest from pants.backend.python.util_rules.python_sources import ( PythonSourceFiles, @@ -89,7 +90,10 @@ async def _create_python_source_run_request( complete_pex_environment = pex_env.in_sandbox(working_directory=None) else: complete_pex_environment = pex_env.in_workspace() - venv_pex = await Get(VenvPex, VenvPexRequest(pex_request, complete_pex_environment)) + venv_pex, python = await MultiGet( + Get(VenvPex, VenvPexRequest(pex_request, complete_pex_environment)), + Get(PythonExecutable, InterpreterConstraints, pex_request.interpreter_constraints), + ) input_digests = [ venv_pex.digest, # Note regarding not-in-sandbox mode: You might think that the sources don't need to be copied @@ -110,7 +114,7 @@ async def _create_python_source_run_request( *chrooted_source_roots, ] extra_env = { - **complete_pex_environment.environment_dict(python_configured=venv_pex.python is not None), + **complete_pex_environment.environment_dict(python=python), "PEX_EXTRA_SYS_PATH": os.pathsep.join(source_roots), } append_only_caches = ( @@ -125,6 +129,7 @@ async def _create_python_source_run_request( **complete_pex_environment.append_only_caches, **append_only_caches, }, + immutable_input_digests=complete_pex_environment.immutable_input_digests, ) @@ -202,4 +207,5 @@ def patched_resolve_remote_root(self, local_root, remote_root): args=args, extra_env=extra_env, append_only_caches=regular_run_request.append_only_caches, + immutable_input_digests=regular_run_request.immutable_input_digests, ) diff --git a/src/python/pants/backend/python/goals/run_python_requirement.py b/src/python/pants/backend/python/goals/run_python_requirement.py index bec0dda7e82..1a06996b3fb 100644 --- a/src/python/pants/backend/python/goals/run_python_requirement.py +++ b/src/python/pants/backend/python/goals/run_python_requirement.py @@ -160,7 +160,7 @@ async def create_python_requirement_run_request( input_digest = venv_pex.digest extra_env = { - **complete_pex_environment.environment_dict(python_configured=venv_pex.python is not None), + **complete_pex_environment.environment_dict(python=None), } return RunRequest( diff --git a/src/python/pants/backend/python/providers/pyenv/rules_integration_test.py b/src/python/pants/backend/python/providers/pyenv/rules_integration_test.py index 49f7777f8ba..3182e05ab2b 100644 --- a/src/python/pants/backend/python/providers/pyenv/rules_integration_test.py +++ b/src/python/pants/backend/python/providers/pyenv/rules_integration_test.py @@ -37,6 +37,7 @@ def rule_runner() -> RuleRunner: PythonSourcesGeneratorTarget, PyenvInstall, ], + preserve_tmpdirs=True, ) @@ -113,7 +114,7 @@ def test_venv_pex_reconstruction(rule_runner): print(venv_location) """ ), - "src/BUILD": "python_sources()", + "src/BUILD": "python_sources(interpreter_constraints=['==3.9.*'])", } ) diff --git a/src/python/pants/backend/python/util_rules/local_dists.py b/src/python/pants/backend/python/util_rules/local_dists.py index 57908195abd..81d2ffa1591 100644 --- a/src/python/pants/backend/python/util_rules/local_dists.py +++ b/src/python/pants/backend/python/util_rules/local_dists.py @@ -117,7 +117,7 @@ def __init__( addresses: Iterable[Address], *, internal_only: bool, - interpreter_constraints: InterpreterConstraints = InterpreterConstraints(), + interpreter_constraints: InterpreterConstraints, sources: PythonSourceFiles = PythonSourceFiles.empty(), ) -> None: object.__setattr__(self, "addresses", Addresses(addresses)) diff --git a/src/python/pants/backend/python/util_rules/local_dists_test.py b/src/python/pants/backend/python/util_rules/local_dists_test.py index 9535e88c0e0..386f44b73bb 100644 --- a/src/python/pants/backend/python/util_rules/local_dists_test.py +++ b/src/python/pants/backend/python/util_rules/local_dists_test.py @@ -15,8 +15,10 @@ from pants.backend.python.macros.python_artifact import PythonArtifact from pants.backend.python.subsystems.setuptools import rules as setuptools_rules from pants.backend.python.target_types import PythonDistribution, PythonSourcesGeneratorTarget -from pants.backend.python.util_rules import local_dists +from pants.backend.python.util_rules import local_dists, pex_from_targets +from pants.backend.python.util_rules.interpreter_constraints import InterpreterConstraints from pants.backend.python.util_rules.local_dists import LocalDistsPex, LocalDistsPexRequest +from pants.backend.python.util_rules.pex_from_targets import InterpreterConstraintsRequest from pants.backend.python.util_rules.python_sources import PythonSourceFiles from pants.build_graph.address import Address from pants.core.util_rules.source_files import SourceFiles @@ -32,6 +34,8 @@ def rule_runner() -> RuleRunner: *setup_py_rules(), *setuptools_rules(), *target_types_rules.rules(), + *pex_from_targets.rules(), + QueryRule(InterpreterConstraints, (InterpreterConstraintsRequest,)), QueryRule(LocalDistsPex, (LocalDistsPexRequest,)), ], target_types=[PythonSourcesGeneratorTarget, PythonDistribution], @@ -79,8 +83,15 @@ def test_build_local_dists(rule_runner: RuleRunner) -> None: ) sources_snapshot = rule_runner.request(Snapshot, [sources_digest]) sources = PythonSourceFiles(SourceFiles(sources_snapshot, tuple()), ("srcroot",)) + addresses = [Address("foo", target_name="dist")] + interpreter_constraints = rule_runner.request( + InterpreterConstraints, [InterpreterConstraintsRequest(addresses)] + ) request = LocalDistsPexRequest( - [Address("foo", target_name="dist")], internal_only=True, sources=sources + addresses, + internal_only=True, + sources=sources, + interpreter_constraints=interpreter_constraints, ) result = rule_runner.request(LocalDistsPex, [request]) diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py index 85d551ea25b..49597fd7351 100644 --- a/src/python/pants/backend/python/util_rules/pex.py +++ b/src/python/pants/backend/python/util_rules/pex.py @@ -622,7 +622,6 @@ async def build_pex( result = await Get( ProcessResult, PexCliProcess( - python=pex_python_setup.python, subcommand=(), extra_args=argv, additional_input_digest=merged_digest, @@ -751,7 +750,7 @@ def _create_venv_script( env_vars = ( f"{name}={shlex.quote(value)}" for name, value in self.complete_pex_env.environment_dict( - python_configured=True + python=self.pex.python ).items() ) @@ -759,7 +758,7 @@ def _create_venv_script( venv_dir = shlex.quote(str(self.venv_dir)) execute_pex_args = " ".join( f"$(adjust_relative_paths {shlex.quote(arg)})" - for arg in self.complete_pex_env.create_argv(self.pex.name, python=self.pex.python) + for arg in self.complete_pex_env.create_argv(self.pex.name) ) script = dedent( @@ -1034,9 +1033,9 @@ def __init__( async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment) -> Process: pex = request.pex complete_pex_env = pex_environment.in_sandbox(working_directory=request.working_directory) - argv = complete_pex_env.create_argv(pex.name, *request.argv, python=pex.python) + argv = complete_pex_env.create_argv(pex.name, *request.argv) env = { - **complete_pex_env.environment_dict(python_configured=pex.python is not None), + **complete_pex_env.environment_dict(python=pex.python), **request.extra_env, } input_digest = ( @@ -1060,6 +1059,7 @@ async def setup_pex_process(request: PexProcess, pex_environment: PexEnvironment **complete_pex_env.append_only_caches, **append_only_caches, }, + immutable_input_digests=pex_environment.bootstrap_python.immutable_input_digests, timeout_seconds=request.timeout_seconds, execution_slot_variable=request.execution_slot_variable, concurrency_available=request.concurrency_available, @@ -1153,6 +1153,7 @@ async def setup_venv_pex_process( output_files=request.output_files, output_directories=request.output_directories, append_only_caches=append_only_caches, + immutable_input_digests=pex_environment.bootstrap_python.immutable_input_digests, timeout_seconds=request.timeout_seconds, execution_slot_variable=request.execution_slot_variable, concurrency_available=request.concurrency_available, diff --git a/src/python/pants/backend/python/util_rules/pex_cli.py b/src/python/pants/backend/python/util_rules/pex_cli.py index fab9e9930e2..29a13930782 100644 --- a/src/python/pants/backend/python/util_rules/pex_cli.py +++ b/src/python/pants/backend/python/util_rules/pex_cli.py @@ -15,7 +15,8 @@ PexSubsystem, PythonExecutable, ) -from pants.core.util_rules import external_tool +from pants.core.util_rules import adhoc_binaries, external_tool +from pants.core.util_rules.adhoc_binaries import PythonBuildStandaloneBinary from pants.core.util_rules.external_tool import ( DownloadedExternalTool, ExternalToolRequest, @@ -61,7 +62,6 @@ def default_known_versions(cls): class PexCliProcess: subcommand: tuple[str, ...] extra_args: tuple[str, ...] - set_resolve_args: bool description: str = dataclasses.field(compare=False) additional_input_digest: Optional[Digest] extra_env: Optional[FrozenDict[str, str]] @@ -78,7 +78,6 @@ def __init__( subcommand: Iterable[str], extra_args: Iterable[str], description: str, - set_resolve_args: bool = True, additional_input_digest: Optional[Digest] = None, extra_env: Optional[Mapping[str, str]] = None, output_files: Optional[Iterable[str]] = None, @@ -90,7 +89,6 @@ def __init__( ) -> None: object.__setattr__(self, "subcommand", tuple(subcommand)) object.__setattr__(self, "extra_args", tuple(extra_args)) - object.__setattr__(self, "set_resolve_args", set_resolve_args) object.__setattr__(self, "description", description) object.__setattr__(self, "additional_input_digest", additional_input_digest) object.__setattr__(self, "extra_env", FrozenDict(extra_env) if extra_env else None) @@ -125,6 +123,7 @@ async def setup_pex_cli_process( request: PexCliProcess, pex_pex: PexPEX, pex_env: PexEnvironment, + bootstrap_python: PythonBuildStandaloneBinary, python_native_code: PythonNativeCodeSubsystem.EnvironmentAware, global_options: GlobalOptions, pex_subsystem: PexSubsystem, @@ -164,11 +163,13 @@ async def setup_pex_cli_process( verbosity_args = [f"-{'v' * pex_subsystem.verbosity}"] if pex_subsystem.verbosity > 0 else [] - resolve_args = ( - [*cert_args, "--python-path", create_path_env_var(pex_env.interpreter_search_paths)] - if request.set_resolve_args - else [] - ) + # NB: We should always pass `--python-path`, as that tells Pex where to look for interpreters + # when `--python` isn't an absolute path. + resolve_args = [ + *cert_args, + "--python-path", + create_path_env_var(pex_env.interpreter_search_paths), + ] # All old-style pex runs take the --pip-version flag, but only certain subcommands of the # `pex3` console script do. So if invoked with a subcommand, the caller must selectively # set --pip-version only on subcommands that take it. @@ -187,15 +188,14 @@ async def setup_pex_cli_process( ] complete_pex_env = pex_env.in_sandbox(working_directory=None) - normalized_argv = complete_pex_env.create_argv(pex_pex.exe, *args, python=request.python) + normalized_argv = complete_pex_env.create_argv(pex_pex.exe, *args) env = { - **complete_pex_env.environment_dict(python_configured=request.python is not None), + **complete_pex_env.environment_dict(python=request.python), **python_native_code.subprocess_env_vars, **(request.extra_env or {}), # If a subcommand is used, we need to use the `pex3` console script. **({"PEX_SCRIPT": "pex3"} if request.subcommand else {}), } - append_only_caches = request.python.append_only_caches if request.python else FrozenDict({}) return Process( normalized_argv, @@ -204,10 +204,8 @@ async def setup_pex_cli_process( env=env, output_files=request.output_files, output_directories=request.output_directories, - append_only_caches={ - **complete_pex_env.append_only_caches, - **append_only_caches, - }, + append_only_caches=complete_pex_env.append_only_caches, + immutable_input_digests=bootstrap_python.immutable_input_digests, level=request.level, concurrency_available=request.concurrency_available, cache_scope=request.cache_scope, @@ -219,4 +217,5 @@ def rules(): *collect_rules(), *external_tool.rules(), *pex_environment.rules(), + *adhoc_binaries.rules(), ] diff --git a/src/python/pants/backend/python/util_rules/pex_environment.py b/src/python/pants/backend/python/util_rules/pex_environment.py index 5db43d2c193..d5c22c63f76 100644 --- a/src/python/pants/backend/python/util_rules/pex_environment.py +++ b/src/python/pants/backend/python/util_rules/pex_environment.py @@ -10,9 +10,11 @@ from pants.core.subsystems.python_bootstrap import PythonBootstrap from pants.core.util_rules import subprocess_environment, system_binaries +from pants.core.util_rules.adhoc_binaries import PythonBuildStandaloneBinary from pants.core.util_rules.subprocess_environment import SubprocessEnvironmentVars -from pants.core.util_rules.system_binaries import BinaryPath, PythonBinary +from pants.core.util_rules.system_binaries import BinaryPath from pants.engine.engine_aware import EngineAwareReturnType +from pants.engine.internals.native_engine import Digest from pants.engine.rules import collect_rules, rule from pants.option.global_options import NamedCachesDirOption from pants.option.option_types import BoolOption, IntOption, StrListOption @@ -91,17 +93,20 @@ def verbosity(self) -> int: @dataclass(frozen=True) class PythonExecutable(BinaryPath, EngineAwareReturnType): - """The BinaryPath of a Python executable, along with some extras.""" + """The BinaryPath of a Python executable for user code, along with some extras.""" append_only_caches: FrozenDict[str, str] = FrozenDict({}) + immutable_input_digests: FrozenDict[str, str] = FrozenDict({}) def __init__( self, path: str, fingerprint: str | None = None, append_only_caches: Mapping[str, str] = FrozenDict({}), + immutable_input_digests: Mapping[str, str] = FrozenDict({}), ) -> None: object.__setattr__(self, "append_only_caches", FrozenDict(append_only_caches)) + object.__setattr__(self, "immutable_input_digests", FrozenDict(immutable_input_digests)) super().__init__(path, fingerprint) self.__post_init__() @@ -124,42 +129,18 @@ def __post_init__(self) -> None: def message(self) -> str: return f"Selected {self.path} to run PEXes with." - @classmethod - def from_python_binary(cls, python_binary: PythonBinary) -> PythonExecutable: - """Converts from PythonBinary to PythonExecutable. - - The PythonBinary type is a singleton representing the Python that is used for script - execution by `@rule`s. On the other hand, there may be multiple PythonExecutables, since - they are subject to a user's interpreter constraints. - """ - return cls(path=python_binary.path, fingerprint=python_binary.fingerprint) - @dataclass(frozen=True) -class PexEnvironment(EngineAwareReturnType): +class PexEnvironment: path: tuple[str, ...] interpreter_search_paths: tuple[str, ...] subprocess_environment_dict: FrozenDict[str, str] named_caches_dir: PurePath - bootstrap_python: PythonExecutable | None = None + bootstrap_python: PythonBuildStandaloneBinary venv_use_symlinks: bool = False _PEX_ROOT_DIRNAME = "pex_root" - def level(self) -> LogLevel: - return LogLevel.DEBUG if self.bootstrap_python else LogLevel.WARN - - def message(self) -> str: - if not self.bootstrap_python: - return softwrap( - """ - No bootstrap Python executable could be found from the option - `interpreter_search_paths` in the `[python]` scope. Will attempt to run - PEXes directly. - """ - ) - return f"Selected {self.bootstrap_python.path} to bootstrap PEXes with." - def in_sandbox(self, *, working_directory: str | None) -> CompletePexEnvironment: pex_root = PurePath(".cache") / self._PEX_ROOT_DIRNAME return CompletePexEnvironment( @@ -167,6 +148,7 @@ def in_sandbox(self, *, working_directory: str | None) -> CompletePexEnvironment pex_root=pex_root, _working_directory=PurePath(working_directory) if working_directory else None, append_only_caches=FrozenDict({self._PEX_ROOT_DIRNAME: str(pex_root)}), + immutable_input_digests=self.bootstrap_python.immutable_input_digests, ) def in_workspace(self) -> CompletePexEnvironment: @@ -181,6 +163,7 @@ def in_workspace(self) -> CompletePexEnvironment: pex_root=pex_root, _working_directory=None, append_only_caches=FrozenDict(), + immutable_input_digests=self.bootstrap_python.immutable_input_digests, ) def venv_site_packages_copies_option(self, use_copies: bool) -> str: @@ -192,7 +175,7 @@ def venv_site_packages_copies_option(self, use_copies: bool) -> str: @rule(desc="Prepare environment for running PEXes", level=LogLevel.DEBUG) async def find_pex_python( python_bootstrap: PythonBootstrap, - python_binary: PythonBinary, + python_binary: PythonBuildStandaloneBinary, pex_subsystem: PexSubsystem, pex_environment_aware: PexSubsystem.EnvironmentAware, subprocess_env_vars: SubprocessEnvironmentVars, @@ -203,7 +186,7 @@ async def find_pex_python( interpreter_search_paths=python_bootstrap.interpreter_search_paths, subprocess_environment_dict=subprocess_env_vars.vars, named_caches_dir=named_caches_dir.val, - bootstrap_python=PythonExecutable.from_python_binary(python_binary), + bootstrap_python=python_binary, venv_use_symlinks=pex_subsystem.venv_use_symlinks, ) @@ -214,8 +197,7 @@ class CompletePexEnvironment: pex_root: PurePath _working_directory: PurePath | None append_only_caches: FrozenDict[str, str] - - _PEX_ROOT_DIRNAME = "pex_root" + immutable_input_digests: FrozenDict[str, Digest] @property def interpreter_search_paths(self) -> tuple[str, ...]: @@ -229,14 +211,9 @@ def create_argv( if self._working_directory else pex_filepath ) - python = python or self._pex_environment.bootstrap_python - if python: - return (python.path, pex_relpath, *args) - if os.path.basename(pex_relpath) == pex_relpath: - return (f"./{pex_relpath}", *args) - return (pex_relpath, *args) + return (self._pex_environment.bootstrap_python.path, pex_relpath, *args) - def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]: + def environment_dict(self, *, python: PythonExecutable | None = None) -> Mapping[str, str]: """The environment to use for running anything with PEX. If the Process is run with a pre-selected Python interpreter, set `python_configured=True` @@ -252,10 +229,9 @@ def environment_dict(self, *, python_configured: bool) -> Mapping[str, str]: ), **self._pex_environment.subprocess_environment_dict, ) - # NB: We only set `PEX_PYTHON_PATH` if the Python interpreter has not already been - # pre-selected by Pants. Otherwise, Pex would inadvertently try to find another interpreter - # when running PEXes. (Creating a PEX will ignore this env var in favor of `--python-path`.) - if not python_configured: + if python: + d["PEX_PYTHON"] = python.path + else: d["PEX_PYTHON_PATH"] = create_path_env_var(self.interpreter_search_paths) return d diff --git a/src/python/pants/core/register.py b/src/python/pants/core/register.py index ac5554a1114..e35c629c5ba 100644 --- a/src/python/pants/core/register.py +++ b/src/python/pants/core/register.py @@ -40,6 +40,7 @@ ) from pants.core.target_types import rules as target_type_rules from pants.core.util_rules import ( + adhoc_binaries, archive, config_files, external_tool, @@ -82,6 +83,7 @@ def rules(): *test.rules(), *bsp_rules(), # util_rules + *adhoc_binaries.rules(), *anonymous_telemetry.rules(), *archive.rules(), *config_files.rules(), diff --git a/src/python/pants/core/subsystems/python_bootstrap.py b/src/python/pants/core/subsystems/python_bootstrap.py index 71578cbfe48..c7d1c7bd450 100644 --- a/src/python/pants/core/subsystems/python_bootstrap.py +++ b/src/python/pants/core/subsystems/python_bootstrap.py @@ -17,18 +17,20 @@ from pants.core.util_rules.environments import EnvironmentTarget, LocalEnvironmentTarget from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest from pants.engine.rules import Get, _uncacheable_rule, collect_rules, rule -from pants.option.option_types import StrListOption +from pants.option.option_types import DictOption, StrListOption from pants.option.subsystem import Subsystem from pants.util.strutil import help_text, softwrap logger = logging.getLogger(__name__) +_PBS_URL_TEMPLATE = "https://github.com/indygreg/python-build-standalone/releases/download/20230116/cpython-3.9.16+20230116-{}-install_only.tar.gz" + class PythonBootstrapSubsystem(Subsystem): options_scope = "python-bootstrap" help = help_text( """ - Options used to locate Python interpreters used by all Pants backends. + Options used to locate Python interpreters This subsystem controls where and how Pants will locate Python, but beyond that it does not control which Python interpreter versions are actually used for your code: see the @@ -36,6 +38,48 @@ class PythonBootstrapSubsystem(Subsystem): """ ) + internal_python_build_standalone_info = DictOption( + default={ + "linux_arm64": ( + _PBS_URL_TEMPLATE.format("aarch64-unknown-linux-gnu"), + "1ba520c0db431c84305677f56eb9a4254f5097430ed443e92fc8617f8fba973d", + 23873387, + ), + "linux_x86_64": ( + _PBS_URL_TEMPLATE.format("x86_64-unknown-linux-gnu"), + "7ba397787932393e65fc2fb9fcfabf54f2bb6751d5da2b45913cb25b2d493758", + 26129729, + ), + "macos_arm64": ( + _PBS_URL_TEMPLATE.format("aarch64-apple-darwin"), + "d732d212d42315ac27c6da3e0b69636737a8d72086c980daf844344c010cab80", + 17084463, + ), + "macos_x86_64": ( + _PBS_URL_TEMPLATE.format("x86_64-apple-darwin"), + "3948384af5e8d4ee7e5ccc648322b99c1c5cf4979954ed5e6b3382c69d6db71e", + 17059474, + ), + }, + help=softwrap( + """ + A map from platform to the information needed to download Python Build Standalone. + + Python Build Standalone is used to run Python-implemented Pants tools/scripts in + docker environments (so that Python doesn't need to be installed). + + The version of Python provided should match the default value's version, which is + the highest Python Major/Minor version compatible with the Pants package's + interpreter constraints. Additionally, the downloaded file should be extractable by + `tar` using `-xvf` (most likely a `.tar.gz` file). + + The schema is : (, , ) + for each possible platform. + """ + ), + advanced=True, + ) + class EnvironmentAware(Subsystem.EnvironmentAware): search_path = StrListOption( default=["", ""], diff --git a/src/python/pants/core/util_rules/adhoc_binaries.py b/src/python/pants/core/util_rules/adhoc_binaries.py new file mode 100644 index 00000000000..a17372d1a58 --- /dev/null +++ b/src/python/pants/core/util_rules/adhoc_binaries.py @@ -0,0 +1,139 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +import os +import sys +from dataclasses import dataclass +from textwrap import dedent # noqa: PNT20 + +from pants.core.subsystems.python_bootstrap import PythonBootstrapSubsystem # noqa: PNT20 +from pants.core.util_rules.environments import EnvironmentTarget, LocalEnvironmentTarget +from pants.core.util_rules.system_binaries import SEARCH_PATHS, TarBinary +from pants.engine.fs import DownloadFile +from pants.engine.internals.native_engine import EMPTY_DIGEST, Digest, FileDigest +from pants.engine.internals.selectors import Get +from pants.engine.platform import Platform +from pants.engine.process import Process, ProcessResult +from pants.engine.rules import collect_rules, rule +from pants.util.frozendict import FrozenDict +from pants.util.logging import LogLevel + + +@dataclass(frozen=True) +class PythonBuildStandaloneBinary: + """A Python interpreter for use by `@rule` code as an alternative to BashBinary scripts. + + This interpreter is provided by Python Build Standalone https://gregoryszorc.com/docs/python-build-standalone/main/, + which has a few caveats. Namely it doesn't play nicely with third-party sdists. Meaning Pants' + scripts being run by Python Build Standalone should avoid third-party sdists. + """ + + SYMLINK_DIRNAME = ".python-build-standalone" + + path: str + _digest: Digest + + @property + def immutable_input_digests(self) -> FrozenDict[str, Digest]: + return FrozenDict({PythonBuildStandaloneBinary.SYMLINK_DIRNAME: self._digest}) + + +# NB: These private types are solely so we can test the docker-path using the local +# environment. +class _PythonBuildStandaloneBinary(PythonBuildStandaloneBinary): + pass + + +class _DownloadPythonBuildStandaloneBinaryRequest: + pass + + +@rule +async def get_python_for_scripts(env_tgt: EnvironmentTarget) -> PythonBuildStandaloneBinary: + if env_tgt.val is None or isinstance(env_tgt.val, LocalEnvironmentTarget): + return PythonBuildStandaloneBinary(sys.executable, EMPTY_DIGEST) + + result = await Get(_PythonBuildStandaloneBinary, _DownloadPythonBuildStandaloneBinaryRequest()) + + return PythonBuildStandaloneBinary(result.path, result._digest) + + +@rule(desc="Downloading Python for scripts", level=LogLevel.TRACE) +async def download_python_binary( + _: _DownloadPythonBuildStandaloneBinaryRequest, + platform: Platform, + tar_binary: TarBinary, + python_bootstrap: PythonBootstrapSubsystem, +) -> _PythonBuildStandaloneBinary: + url, fingerprint, bytelen = python_bootstrap.internal_python_build_standalone_info[ + platform.value + ] + + filename = url.rsplit("/", 1)[-1] + python_archive = await Get( + Digest, + DownloadFile( + url, + FileDigest( + fingerprint=fingerprint, + serialized_bytes_length=bytelen, + ), + ), + ) + + result = await Get( + ProcessResult, + Process( + argv=[tar_binary.path, "-xvf", filename], + input_digest=python_archive, + env={"PATH": os.pathsep.join(SEARCH_PATHS)}, + description="Extract Python", + level=LogLevel.DEBUG, + output_directories=("python",), + ), + ) + + return _PythonBuildStandaloneBinary( + f"{PythonBuildStandaloneBinary.SYMLINK_DIRNAME}/python/bin/python3", result.output_digest + ) + + +@dataclass(frozen=True) +class GunzipBinaryRequest: + pass + + +@dataclass(frozen=True) +class GunzipBinary: + python_binary: PythonBuildStandaloneBinary + + def extract_archive_argv(self, archive_path: str, extract_path: str) -> tuple[str, ...]: + archive_name = os.path.basename(archive_path) + dest_file_name = os.path.splitext(archive_name)[0] + dest_path = os.path.join(extract_path, dest_file_name) + script = dedent( + f""" + import gzip + import shutil + with gzip.GzipFile(filename={archive_path!r}, mode="rb") as source: + with open({dest_path!r}, "wb") as dest: + shutil.copyfileobj(source, dest) + """ + ) + return (self.python_binary.path, "-c", script) + + +@rule +def find_gunzip(python_binary: PythonBuildStandaloneBinary) -> GunzipBinary: + return GunzipBinary(python_binary) + + +@rule +async def find_gunzip_wrapper(_: GunzipBinaryRequest, gunzip: GunzipBinary) -> GunzipBinary: + return gunzip + + +def rules(): + return collect_rules() diff --git a/src/python/pants/core/util_rules/adhoc_binaries_test.py b/src/python/pants/core/util_rules/adhoc_binaries_test.py new file mode 100644 index 00000000000..e9655aba9de --- /dev/null +++ b/src/python/pants/core/util_rules/adhoc_binaries_test.py @@ -0,0 +1,78 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import sys + +import pytest + +from pants.build_graph.address import Address +from pants.core.target_types import FileTarget +from pants.core.util_rules import adhoc_binaries +from pants.core.util_rules.adhoc_binaries import ( + PythonBuildStandaloneBinary, + _DownloadPythonBuildStandaloneBinaryRequest, + _PythonBuildStandaloneBinary, +) +from pants.core.util_rules.environments import EnvironmentTarget, LocalEnvironmentTarget +from pants.engine.internals.native_engine import EMPTY_DIGEST +from pants.testutil.rule_runner import MockGet, QueryRule, RuleRunner, run_rule_with_mocks + + +@pytest.fixture +def rule_runner() -> RuleRunner: + return RuleRunner( + rules=[ + *adhoc_binaries.rules(), + QueryRule( + _PythonBuildStandaloneBinary, + [_DownloadPythonBuildStandaloneBinaryRequest], + ), + ], + target_types=[LocalEnvironmentTarget, FileTarget], + ) + + +@pytest.mark.parametrize("env_tgt", [None, LocalEnvironmentTarget({}, address=Address(""))]) +def test_local(env_tgt) -> None: + result = run_rule_with_mocks( + adhoc_binaries.get_python_for_scripts, + rule_args=[EnvironmentTarget("local", env_tgt)], + mock_gets=[ + MockGet( + output_type=_PythonBuildStandaloneBinary, + input_types=(_DownloadPythonBuildStandaloneBinaryRequest,), + mock=lambda _: pytest.fail(), + ) + ], + ) + assert result == adhoc_binaries.PythonBuildStandaloneBinary(sys.executable, EMPTY_DIGEST) + + +def test_docker_uses_helper() -> None: + result = run_rule_with_mocks( + adhoc_binaries.get_python_for_scripts, + rule_args=[EnvironmentTarget("docker", FileTarget({"source": ""}, address=Address("")))], + mock_gets=[ + MockGet( + output_type=_PythonBuildStandaloneBinary, + input_types=(_DownloadPythonBuildStandaloneBinaryRequest,), + mock=lambda _: _PythonBuildStandaloneBinary("", EMPTY_DIGEST), + ) + ], + ) + assert result == PythonBuildStandaloneBinary("", EMPTY_DIGEST) + + +def test_docker_helper(rule_runner): + rule_runner.write_files( + { + "BUILD": "local_environment(name='local')", + } + ) + rule_runner.set_options(["--environments-preview-names={'local': '//:local'}"]) + pbs = rule_runner.request( + _PythonBuildStandaloneBinary, + [_DownloadPythonBuildStandaloneBinaryRequest()], + ) + assert not pbs.path.startswith("/") + assert pbs._digest is not None diff --git a/src/python/pants/core/util_rules/archive.py b/src/python/pants/core/util_rules/archive.py index 141be5e8054..1e114ae2b43 100644 --- a/src/python/pants/core/util_rules/archive.py +++ b/src/python/pants/core/util_rules/archive.py @@ -10,13 +10,12 @@ from pathlib import PurePath from pants.core.util_rules import system_binaries +from pants.core.util_rules.adhoc_binaries import GunzipBinary, GunzipBinaryRequest from pants.core.util_rules.system_binaries import SEARCH_PATHS from pants.core.util_rules.system_binaries import ArchiveFormat as ArchiveFormat from pants.core.util_rules.system_binaries import ( BashBinary, BashBinaryRequest, - GunzipBinary, - GunzipBinaryRequest, TarBinary, TarBinaryRequest, UnzipBinary, @@ -35,6 +34,7 @@ ) from pants.engine.process import Process, ProcessResult from pants.engine.rules import Get, MultiGet, collect_rules, rule +from pants.util.frozendict import FrozenDict from pants.util.logging import LogLevel from pants.util.strutil import softwrap @@ -168,13 +168,14 @@ async def maybe_extract_archive(request: MaybeExtractArchiveRequest) -> Extracte return ExtractedArchive(request.digest) merge_digest_get = Get(Digest, MergeDigests((request.digest, output_dir_digest))) + env = {} + immutable_input_digests: FrozenDict[str, Digest] = FrozenDict({}) if is_zip: input_digest, unzip_binary = await MultiGet( merge_digest_get, Get(UnzipBinary, UnzipBinaryRequest()), ) argv = unzip_binary.extract_archive_argv(archive_path, extract_archive_dir) - env = {} elif is_tar: input_digest, tar_binary = await MultiGet( merge_digest_get, @@ -191,7 +192,7 @@ async def maybe_extract_archive(request: MaybeExtractArchiveRequest) -> Extracte Get(GunzipBinary, GunzipBinaryRequest()), ) argv = gunzip.extract_archive_argv(archive_path, extract_archive_dir) - env = {} + immutable_input_digests = gunzip.python_binary.immutable_input_digests result = await Get( ProcessResult, @@ -202,6 +203,7 @@ async def maybe_extract_archive(request: MaybeExtractArchiveRequest) -> Extracte description=f"Extract {archive_path}", level=LogLevel.DEBUG, output_directories=(extract_archive_dir,), + immutable_input_digests=immutable_input_digests, ), ) resulting_digest = await Get(Digest, RemovePrefix(result.output_digest, extract_archive_dir)) diff --git a/src/python/pants/jvm/resolve/coursier_setup.py b/src/python/pants/jvm/resolve/coursier_setup.py index e615ffda109..c56bc2570cd 100644 --- a/src/python/pants/jvm/resolve/coursier_setup.py +++ b/src/python/pants/jvm/resolve/coursier_setup.py @@ -11,17 +11,19 @@ from typing import ClassVar, Iterable, Tuple from pants.core.util_rules import external_tool +from pants.core.util_rules.adhoc_binaries import PythonBuildStandaloneBinary from pants.core.util_rules.external_tool import ( DownloadedExternalTool, ExternalToolRequest, TemplatedExternalTool, ) -from pants.core.util_rules.system_binaries import BashBinary, PythonBinary +from pants.core.util_rules.system_binaries import BashBinary from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests from pants.engine.platform import Platform from pants.engine.process import Process from pants.engine.rules import Get, MultiGet, collect_rules, rule from pants.option.option_types import StrListOption +from pants.util.frozendict import FrozenDict from pants.util.logging import LogLevel from pants.util.memo import memoized_property from pants.util.ordered_set import FrozenOrderedSet @@ -157,6 +159,7 @@ class Coursier: coursier: DownloadedExternalTool _digest: Digest repos: FrozenOrderedSet[str] + _immutable_input_digests: FrozenDict[str, Digest] bin_dir: ClassVar[str] = "__coursier" fetch_wrapper_script: ClassVar[str] = f"{bin_dir}/coursier_fetch_wrapper_script.sh" @@ -204,7 +207,7 @@ def append_only_caches(self) -> dict[str, str]: @property def immutable_input_digests(self) -> dict[str, Digest]: - return {self.bin_dir: self._digest} + return {self.bin_dir: self._digest, **self._immutable_input_digests} @dataclass(frozen=True) @@ -241,7 +244,7 @@ async def invoke_coursier_wrapper( @rule async def setup_coursier( coursier_subsystem: CoursierSubsystem, - python: PythonBinary, + python: PythonBuildStandaloneBinary, platform: Platform, ) -> Coursier: repos_args = ( @@ -300,6 +303,7 @@ async def setup_coursier( ), ), repos=FrozenOrderedSet(coursier_subsystem.repos), + _immutable_input_digests=python.immutable_input_digests, ) diff --git a/src/python/pants/testutil/pants_integration_test.py b/src/python/pants/testutil/pants_integration_test.py index 6ac22db55dc..a13f8b51f43 100644 --- a/src/python/pants/testutil/pants_integration_test.py +++ b/src/python/pants/testutil/pants_integration_test.py @@ -91,10 +91,13 @@ def run_pants_with_workdir_without_waiting( shell: bool = False, set_pants_ignore: bool = True, ) -> PantsJoinHandle: - args = ["--no-pantsrc", f"--pants-workdir={workdir}"] + args = [ + "--no-pantsrc", + f"--pants-workdir={workdir}", + ] if set_pants_ignore: - # FIXME: For some reason, Pants's CI adds this file and it is not ignored by default. Why? - args.append("--pants-ignore=+['.coverage.*']") + # FIXME: For some reason, Pants's CI adds the coverage file and it is not ignored by default. Why? + args.append("--pants-ignore=+['.coverage.*', '.python-build-standalone']") pantsd_in_command = "--no-pantsd" in command or "--pantsd" in command pantsd_in_config = config and "GLOBAL" in config and "pantsd" in config["GLOBAL"] diff --git a/src/python/pants/testutil/rule_runner.py b/src/python/pants/testutil/rule_runner.py index f686b5b8f67..b6bab97b65e 100644 --- a/src/python/pants/testutil/rule_runner.py +++ b/src/python/pants/testutil/rule_runner.py @@ -35,6 +35,7 @@ from pants.base.specs_parser import SpecsParser from pants.build_graph.build_configuration import BuildConfiguration from pants.build_graph.build_file_aliases import BuildFileAliases +from pants.core.util_rules import adhoc_binaries from pants.engine.addresses import Address from pants.engine.console import Console from pants.engine.env_vars import CompleteEnvironmentVars @@ -294,6 +295,7 @@ def rewrite_rule_for_inherent_environment(rule): all_rules = ( *self.rules, *source_root.rules(), + *adhoc_binaries.rules(), QueryRule(WrappedTarget, [WrappedTargetRequest]), QueryRule(AllTargets, []), QueryRule(UnionMembership, []),