Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability for Pants to provide Python via a union (with a pyenv impl) #18352

Merged
merged 30 commits into from
Mar 5, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ async def parse_python_dependencies(
file,
],
input_digest=input_digest,
append_only_caches=python_interpreter.append_only_caches,
description=f"Determine Python dependencies for {request.source.address}",
env=parser_script.env,
level=LogLevel.DEBUG,
Expand Down
5 changes: 4 additions & 1 deletion src/python/pants/backend/python/goals/run_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@ async def _create_python_source_run_request(
digest=merged_digest,
args=[_in_chroot(venv_pex.pex.argv0)],
extra_env=extra_env,
append_only_caches=complete_pex_environment.append_only_caches,
append_only_caches={
**complete_pex_environment.append_only_caches,
**venv_pex.append_only_caches,
},
)


Expand Down
4 changes: 4 additions & 0 deletions src/python/pants/backend/python/providers/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_sources()
Empty file.
12 changes: 12 additions & 0 deletions src/python/pants/backend/python/providers/pyenv/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

python_sources()
python_tests(
name="tests",
overrides={
"rules_integration_test.py": {
"timeout": 600,
}
},
)
Empty file.
9 changes: 9 additions & 0 deletions src/python/pants/backend/python/providers/pyenv/register.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).


from pants.backend.python.providers.pyenv.rules import rules as pyenv_rules


def rules():
return [*pyenv_rules()]
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
182 changes: 182 additions & 0 deletions src/python/pants/backend/python/providers/pyenv/rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from textwrap import dedent
thejcannon marked this conversation as resolved.
Show resolved Hide resolved

from pants.backend.python.subsystems.setup import PythonSetup
from pants.backend.python.util_rules.pex import PythonProvider
from pants.backend.python.util_rules.pex import rules as pex_rules
from pants.backend.python.util_rules.pex_environment import PythonExecutable
from pants.core.util_rules.external_tool import (
DownloadedExternalTool,
ExternalToolRequest,
TemplatedExternalTool,
)
from pants.core.util_rules.external_tool import rules as external_tools_rules
from pants.engine.env_vars import EnvironmentVars, EnvironmentVarsRequest
from pants.engine.fs import CreateDigest, FileContent
from pants.engine.internals.native_engine import Digest, MergeDigests
from pants.engine.internals.selectors import Get, MultiGet
from pants.engine.platform import Platform
from pants.engine.process import Process, ProcessCacheScope, ProcessResult
from pants.engine.rules import collect_rules, rule
from pants.engine.unions import UnionRule
from pants.option.option_types import StrListOption
from pants.util.frozendict import FrozenDict
from pants.util.meta import classproperty


class PyenvSubsystem(TemplatedExternalTool):
options_scope = "pyenv"
name = "pyenv"
help = "pyenv (https://github.com/pyenv/pyenv)."

default_version = "2.3.13"
default_url_template = "https://github.com/pyenv/pyenv/archive/refs/tags/v{version}.tar.gz"

python_configure_opts = StrListOption(
help="Flags to use when configuring CPython.",
advanced=True,
)
thejcannon marked this conversation as resolved.
Show resolved Hide resolved

@classproperty
def default_known_versions(cls):
return [
"|".join(
(
cls.default_version,
plat,
"9105de5e5cf8dc0eca2a520ed04493d183128d46a2cfb402d4cc271af1bf144b",
"749323",
)
)
for plat in ["macos_arm64", "macos_x86_64", "linux_x86_64", "linux_arm64"]
]

def generate_exe(self, plat: Platform) -> str:
"""Returns the path to the tool executable.

If the downloaded artifact is the executable itself, you can leave this unimplemented.

If the downloaded artifact is an archive, this should be overridden to provide a
relative path in the downloaded archive, e.g. `./bin/protoc`.
"""
return f"./pyenv-{self.version}/bin/pyenv"


class PythonToolchainRequest(PythonProvider):
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
pass


PYENV_APPEND_ONLY_CACHES = FrozenDict({"pyenv": ".pyenv"})
thejcannon marked this conversation as resolved.
Show resolved Hide resolved


@rule
async def get_python(
request: PythonToolchainRequest,
python_setup: PythonSetup,
platform: Platform,
pyenv_subsystem: PyenvSubsystem,
) -> PythonExecutable:
pyenv_version = pyenv_subsystem.version

env_vars, pyenv = await MultiGet(
Get(EnvironmentVars, EnvironmentVarsRequest(["PATH", "LDFLAGS"])),
Get(DownloadedExternalTool, ExternalToolRequest, pyenv_subsystem.get_request(platform)),
)

python_to_use = request.interpreter_constraints.minimum_python_version(
python_setup.interpreter_versions_universe
)
assert python_to_use is not None, "whoopsie"
thejcannon marked this conversation as resolved.
Show resolved Hide resolved

which_python_result = await Get(
ProcessResult,
Process(
[pyenv.exe, "latest", "--known", python_to_use],
input_digest=pyenv.digest,
description=f"Choose specific version for Python {python_to_use}",
env={"PATH": env_vars.get("PATH", "")},
# Caching the result is OK, since if the user really needs a different patch,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I concur. And this will only matter when new Python versions are released, which is... not often.

# they should list a more precise IC.
),
)
specific_python = which_python_result.stdout.decode("ascii").strip()
thejcannon marked this conversation as resolved.
Show resolved Hide resolved

shim_digest = await Get(
Digest,
CreateDigest(
[
FileContent(
"install_python_shim.sh",
dedent(
f"""\
#!/usr/bin/env bash
set -e
mkdir .pyenv/{pyenv_version} || true
DEST=$(realpath .pyenv/{pyenv_version}/{specific_python})
if [ ! -d "$DEST" ]; then
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
export PYENV_ROOT=$(realpath $(mktemp -d -u -p .pyenv/{pyenv_version} {specific_python}.XXXXXX))
# export LDFLAGS="$LDFLAGS -Wl,-rpath=PYENV_ROOT/lib"
{pyenv.exe} install {specific_python}
# Removing write perms helps ensure users aren't accidentally modifying Python
# or the site-packages
chmod -R -w "$PYENV_ROOT"/versions/{specific_python}
chmod +w "$PYENV_ROOT"/versions/{specific_python}
ln -s "$PYENV_ROOT"/versions/{specific_python} .pyenv/{pyenv_version}/{specific_python}
rm -rf "$PYENV_ROOT"/shims
fi
echo "$DEST"/bin/python
"""
).encode("ascii"),
is_executable=True,
)
]
),
)
digest = await Get(Digest, MergeDigests([shim_digest, pyenv.digest]))

# NB: We don't cache this process at any level for two reasons:
# 1. Several tools (including pex) refer to Python at an absolute path, so a named cache is
# the only way for this to work reasonably well. Since the named cache could be wiped between
# runs (technically during a run, but we can't do anything about that) the
# fastest-yet-still-correct solution is to always run this process and make it bail
# early-and-quickly if the requisite Python already exists.
# 2. Pyenv compiles Python using whatever compiler the system is configured to use. Python
# then stores this information so that it cn use the same compiler when compiling extension
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
# modules. Therefore caching the compiled Python is somewhat unsafe (especially for a remote
# cache).
result = await Get(
ProcessResult,
Process(
["./install_python_shim.sh"],
input_digest=digest,
description=f"Install Python {python_to_use}",
append_only_caches=PYENV_APPEND_ONLY_CACHES,
env={
"PATH": env_vars.get("PATH", ""),
"TMPDIR": "{chroot}/tmpdir",
"LDFLAGS": env_vars.get("LDFLAGS", ""),
"PYTHON_CONFIGURE_OPTS": " ".join(pyenv_subsystem.python_configure_opts),
},
# Don't cache, we want this to always be run so that we can assume for the rest of the
# session the named_cache destination for this Python is valid, as the Python ecosystem
# mainly assumes absolute paths for Python interpreters.
cache_scope=ProcessCacheScope.PER_SESSION,
),
)

return PythonExecutable(
path=result.stdout.decode("utf-8").splitlines()[-1].strip(),
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
fingerprint=None,
append_only_caches=PYENV_APPEND_ONLY_CACHES,
)


def rules():
return (
*collect_rules(),
*pex_rules(),
*external_tools_rules(),
UnionRule(PythonProvider, PythonToolchainRequest),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import annotations

import shutil
from textwrap import dedent
from typing import Tuple

import pytest

from pants.backend.python import target_types_rules
from pants.backend.python.dependency_inference import rules as dependency_inference_rules
from pants.backend.python.goals.run_python_source import PythonSourceFieldSet
from pants.backend.python.goals.run_python_source import rules as run_rules
from pants.backend.python.providers.pyenv.rules import rules as pyenv_rules
from pants.backend.python.target_types import PythonSourcesGeneratorTarget
from pants.build_graph.address import Address
from pants.core.goals.run import RunRequest
from pants.engine.process import InteractiveProcess
from pants.engine.rules import QueryRule
from pants.engine.target import Target
from pants.testutil.rule_runner import RuleRunner, mock_console


@pytest.fixture
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[
*run_rules(),
*pyenv_rules(),
*dependency_inference_rules.rules(),
*target_types_rules.rules(),
QueryRule(RunRequest, (PythonSourceFieldSet,)),
],
target_types=[
PythonSourcesGeneratorTarget,
],
)


def run_run_request(
rule_runner: RuleRunner,
target: Target,
) -> Tuple[int, str, str]:
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
args = [
"--backend-packages=['pants.backend.python', 'pants.backend.python.providers.pyenv']",
"--source-root-patterns=['src']",
]
rule_runner.set_options(args, env_inherit={"PATH", "PYENV_ROOT", "HOME"})
run_request = rule_runner.request(RunRequest, [PythonSourceFieldSet.create(target)])
run_process = InteractiveProcess(
argv=run_request.args,
env=run_request.extra_env,
input_digest=run_request.digest,
run_in_workspace=True,
immutable_input_digests=run_request.immutable_input_digests,
append_only_caches=run_request.append_only_caches,
)
with mock_console(rule_runner.options_bootstrapper) as mocked_console:
result = rule_runner.run_interactive_process(run_process)
stdout = mocked_console[1].get_stdout()
stderr = mocked_console[1].get_stderr()

return result.exit_code, stdout.strip(), stderr.strip()


@pytest.mark.parametrize("py_version", ["==2.7.*", "==3.9.*"])
def test_using_pyenv(rule_runner, py_version):
rule_runner.write_files(
{
"src/app.py": dedent(
"""\
import os.path
import sysconfig

print(sysconfig.get_config_var("prefix"))
"""
),
"src/BUILD": f"python_sources(interpreter_constraints=['{py_version}'])",
}
)

target = rule_runner.get_target(Address("src", relative_file_path="app.py"))
_1, stdout, _2 = run_run_request(rule_runner, target)
named_caches_dir = (
rule_runner.options_bootstrapper.bootstrap_options.for_global_scope().named_caches_dir
)
assert stdout.startswith(f"{named_caches_dir}/pyenv")
thejcannon marked this conversation as resolved.
Show resolved Hide resolved


def test_venv_pex_reconstruction(rule_runner):
"""A VenvPex refers to the location of the venv so it doesn't have to re-construct if it exists.

Part of this location is a hash of the interpreter. Without careful consideration it can be easy
for this hash to drift from build-time to run-time. This invalidates the assumption that the
thejcannon marked this conversation as resolved.
Show resolved Hide resolved
venv could be reconstructed exactly if the underlying directory was wiped clean.
"""
rule_runner.write_files(
{
"src/app.py": dedent(
"""\
import pathlib
import sys

in_venv_python_path = pathlib.Path(sys.executable)
venv_link = in_venv_python_path.parent.parent
venv_location = venv_link.resolve()
print(venv_location)
"""
),
"src/BUILD": "python_sources()",
}
)

target = rule_runner.get_target(Address("src", relative_file_path="app.py"))
_1, stdout1, _2 = run_run_request(rule_runner, target)
assert "pex_root/venvs/" in stdout1
venv_location = stdout1
shutil.rmtree(venv_location)
_1, stdout2, _2 = run_run_request(rule_runner, target)
assert stdout1 == stdout2
Loading