Skip to content

Commit

Permalink
Fix site-packages identification. (pex-tool#2262)
Browse files Browse the repository at this point in the history
Fix site-packages identification.

In the process of working a new feature, it was discovered that
`sys.path` manipulations leaked into `PythonInterpreter` when they were
performed before the `__pex__` import hook. This is fixed by always
going through the interpreter disk cache path, which has properly
isolated `sys.path` for quite some time, eliminating the one exception
for the current interpreter. Fixing this case caused several tests to
fail that exposed several eager uses of `PythonInterpreter.get()` that
would execute in import of various modules. Those cases are fixed to
either be lazy or use `sys.executable`.

Along the way, fix up the ~TODO for migrating from `distutils.sysconfig`
to `sysconfig` for determining the site-packages directories. This
should preempt issues with vendor (e.g.: Debian) supplied Python 3.12+.
  • Loading branch information
jsirois authored Oct 9, 2023
1 parent f9a9d94 commit 1f20e4a
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 73 deletions.
38 changes: 27 additions & 11 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,32 @@ def _iter_site_packages():
# include a getsitepackages function.
TRACER.log("The site module does not define getsitepackages: {err}".format(err=e))

# The distutils package was deprecated in 3.10 and removed in 3.12. The sysconfig module was
# introduced in 3.2 but is not usable for our purposes until 3.11. We need
# `get_default_scheme` to get the current interpreter's installation scheme, which was made
# public in 3.10, but not made correct for venv interpreters until 3.11.
try:
import sysconfig

get_default_scheme = getattr(sysconfig, "get_default_scheme", None)
if get_default_scheme and sys.version_info[:2] >= (3, 11):
scheme = get_default_scheme()
yield sysconfig.get_path("purelib", scheme)
yield sysconfig.get_path("platlib", scheme)
return
except ImportError:
pass

# The distutils.sysconfig module is deprecated in Python 3.10 but still around. It goes away
# in 3.12 with viable replacements in sysconfig starting in Python 3.11. See above where we
# use those replacements preferentially, when available.
try:
from distutils.sysconfig import get_python_lib

yield get_python_lib(plat_specific=False)
yield get_python_lib(plat_specific=True)
except ImportError as e:
# The distutils.sysconfig module is deprecated in Python 3.10 but still around.
# Eventually it will go away with replacements in sysconfig that we'll add at that time
# as another site packages source.
TRACER.log(
"The distutils.sysconfig module does not define get_python_lib: {err}".format(err=e)
)
except ImportError:
pass

@staticmethod
def _iter_extras_paths(site_packages):
Expand Down Expand Up @@ -304,6 +318,11 @@ def encode(self):
def binary(self):
return self._binary

@property
def is_venv(self):
# type: () -> bool
return self._prefix != self._base_prefix

@property
def prefix(self):
# type: () -> str
Expand Down Expand Up @@ -979,9 +998,6 @@ def _spawn_from_binary(cls, binary):
cached_interpreter = cls._PYTHON_INTERPRETER_BY_NORMALIZED_PATH.get(canonicalized_binary)
if cached_interpreter is not None:
return SpawnedJob.completed(cached_interpreter)
if canonicalized_binary == cls.canonicalize_path(sys.executable):
current_interpreter = cls(PythonIdentity.get())
return SpawnedJob.completed(current_interpreter)
return cls._spawn_from_binary_external(canonicalized_binary)

@classmethod
Expand Down Expand Up @@ -1149,7 +1165,7 @@ def binary(self):
def is_venv(self):
# type: () -> bool
"""Return `True` if this interpreter is homed in a virtual environment."""
return self._identity.prefix != self._identity.base_prefix
return self._identity.is_venv

@property
def prefix(self):
Expand Down
27 changes: 15 additions & 12 deletions pex/pex.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright 2014 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import, print_function
from __future__ import absolute_import

import ast
import itertools
import os
import sys
from site import USER_SITE
Expand All @@ -17,7 +18,7 @@
from pex.executor import Executor
from pex.finders import get_entry_point_from_console_script, get_script_from_distributions
from pex.inherit_path import InheritPath
from pex.interpreter import PythonInterpreter
from pex.interpreter import PythonIdentity, PythonInterpreter
from pex.layout import Layout
from pex.orderedset import OrderedSet
from pex.pex_info import PexInfo
Expand Down Expand Up @@ -50,42 +51,44 @@
class IsolatedSysPath(object):
@staticmethod
def _expand_paths(*paths):
# type: (*str) -> Iterator[str]
for path in paths:
# type: (*str) -> OrderedSet[str]
def iter_synonyms(path):
yield path
if not os.path.isabs(path):
yield os.path.abspath(path)
yield os.path.abspath(path)
yield os.path.realpath(path)

return OrderedSet(itertools.chain.from_iterable(iter_synonyms(path) for path in paths))

@classmethod
def for_pex(
cls,
interpreter, # type: PythonInterpreter
interpreter, # type: Union[PythonInterpreter, PythonIdentity]
pex, # type: str
pex_pex=None, # type: Optional[str]
):
# type: (...) -> IsolatedSysPath
sys_path = OrderedSet(interpreter.sys_path)
ident = interpreter.identity if isinstance(interpreter, PythonInterpreter) else interpreter
sys_path = OrderedSet(ident.sys_path)
sys_path.add(pex)
sys_path.add(Bootstrap.locate().path)
if pex_pex:
sys_path.add(pex_pex)

site_packages = OrderedSet() # type: OrderedSet[str]
for site_lib in interpreter.site_packages:
for site_lib in ident.site_packages:
TRACER.log("Discarding site packages path: {site_lib}".format(site_lib=site_lib))
site_packages.add(site_lib)

extras_paths = OrderedSet() # type: OrderedSet[str]
for extras_path in interpreter.extras_paths:
for extras_path in ident.extras_paths:
TRACER.log("Discarding site extras path: {extras_path}".format(extras_path=extras_path))
extras_paths.add(extras_path)

return cls(
sys_path=sys_path,
site_packages=site_packages,
extras_paths=extras_paths,
is_venv=interpreter.is_venv,
is_venv=ident.is_venv,
)

def __init__(
Expand Down Expand Up @@ -339,7 +342,7 @@ def all_distribution_paths(path):
# type: (Optional[str]) -> Iterable[str]
if path is None:
return ()
locations = set(dist.location for dist in find_distributions(path))
locations = set(dist.location for dist in find_distributions([path]))
return {path} | locations | set(os.path.realpath(path) for path in locations)

for path_element in sys.path:
Expand Down
15 changes: 9 additions & 6 deletions pex/pip/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from __future__ import absolute_import

import os
import sys

from pex import targets
from pex.dist_metadata import Requirement
from pex.enum import Enum
from pex.pep_440 import Version
Expand All @@ -14,7 +14,7 @@
from pex.typing import TYPE_CHECKING, cast

if TYPE_CHECKING:
from typing import Iterable, Optional, Tuple
from typing import Iterable, Optional, Tuple, Union


class PipVersionValue(Enum.Value):
Expand Down Expand Up @@ -72,10 +72,13 @@ def requirements(self):
return self.requirement, self.setuptools_requirement, self.wheel_requirement

def requires_python_applies(self, target=None):
# type: (Optional[Target]) -> bool
# type: (Optional[Union[Version,Target]]) -> bool
if not self.requires_python:
return True

if isinstance(target, Version):
return str(target) in self.requires_python

return LocalInterpreter.create(
interpreter=target.get_interpreter() if target else None
).requires_python_applies(
Expand All @@ -102,20 +105,20 @@ def __init__(self, preferred):
def __get__(self, obj, objtype=None):
if not hasattr(self, "_default"):
self._default = None
current_target = targets.current()
current_version = Version(".".join(map(str, sys.version_info[:3])))
preferred_versions = (
[PipVersionValue.overridden()] if PipVersionValue.overridden() else self._preferred
)
for preferred_version in preferred_versions:
if preferred_version.requires_python_applies(current_target):
if preferred_version.requires_python_applies(current_version):
self._default = preferred_version
break
if self._default is None:
self._default = max(
(
version
for version in PipVersionValue._iter_values()
if not version.hidden and version.requires_python_applies(current_target)
if not version.hidden and version.requires_python_applies(current_version)
),
key=lambda pv: pv.version,
)
Expand Down
4 changes: 2 additions & 2 deletions pex/resolve/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ def get_downloads_dir(pex_root=None):
@attr.s(frozen=True)
class ArtifactDownloader(object):
resolver = attr.ib() # type: Resolver
target = attr.ib(default=LocalInterpreter.create()) # type: Target
target = attr.ib(factory=LocalInterpreter.create) # type: Target
package_index_configuration = attr.ib(
default=PackageIndexConfiguration.create()
factory=PackageIndexConfiguration.create
) # type: PackageIndexConfiguration
max_parallel_jobs = attr.ib(default=None) # type: Optional[int]
pip = attr.ib(init=False) # type: Pip
Expand Down
33 changes: 12 additions & 21 deletions pex/resolve/target_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,12 @@ def register(
),
)

current_interpreter = PythonInterpreter.get()
program = sys.argv[0]
singe_interpreter_info_cmd = (
"PEX_TOOLS=1 {current_interpreter} {program} interpreter --verbose --indent 4".format(
current_interpreter=current_interpreter.binary, program=program
single_interpreter_info_cmd = (
"pex3 interpreter inspect --python {current_interpreter} --verbose --indent 4".format(
current_interpreter=sys.executable
)
)
all_interpreters_info_cmd = (
"PEX_TOOLS=1 {program} interpreter --all --verbose --indent 4".format(program=program)
)
all_interpreters_info_cmd = "pex3 interpreter inspect --all --verbose --indent 4"

parser.add_argument(
"--interpreter-constraint",
Expand All @@ -86,22 +82,19 @@ def register(
"the constraints. Try `{singe_interpreter_info_cmd}` to find the exact interpreter "
"constraints of {current_interpreter} and `{all_interpreters_info_cmd}` to find out "
"the interpreter constraints of all Python interpreters on the $PATH.".format(
current_interpreter=current_interpreter.binary,
singe_interpreter_info_cmd=singe_interpreter_info_cmd,
current_interpreter=sys.executable,
singe_interpreter_info_cmd=single_interpreter_info_cmd,
all_interpreters_info_cmd=all_interpreters_info_cmd,
)
),
)

if include_platforms:
_register_platform_options(
parser, current_interpreter, singe_interpreter_info_cmd, all_interpreters_info_cmd
)
_register_platform_options(parser, single_interpreter_info_cmd, all_interpreters_info_cmd)


def _register_platform_options(
parser, # type: _ActionsContainer
current_interpreter, # type: PythonInterpreter
singe_interpreter_info_cmd, # type: str
all_interpreters_info_cmd, # type: str
):
Expand All @@ -120,13 +113,11 @@ def _register_platform_options(
"string composed of fields: <platform>-<python impl abbr>-<python version>-<abi>. "
"These fields stem from wheel name conventions as outlined in "
"https://www.python.org/dev/peps/pep-0427#file-name-convention and influenced by "
"https://www.python.org/dev/peps/pep-0425. For the current interpreter at "
"{current_interpreter} the full platform string is {current_platform}. To find out "
"more, try `{all_interpreters_info_cmd}` to print out the platform for all "
"interpreters on the $PATH or `{singe_interpreter_info_cmd}` to inspect the single "
"interpreter {current_interpreter}.".format(
current_interpreter=current_interpreter.binary,
current_platform=current_interpreter.platform,
"https://www.python.org/dev/peps/pep-0425. To find out more, try "
"`{all_interpreters_info_cmd}` to print out the platform for all interpreters on the "
"$PATH or `{singe_interpreter_info_cmd}` to inspect the single interpreter "
"{current_interpreter}.".format(
current_interpreter=sys.executable,
singe_interpreter_info_cmd=singe_interpreter_info_cmd,
all_interpreters_info_cmd=all_interpreters_info_cmd,
)
Expand Down
2 changes: 1 addition & 1 deletion pex/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,7 @@ def _download_internal(
class LocalDistribution(object):
path = attr.ib() # type: str
fingerprint = attr.ib() # type: str
target = attr.ib(default=targets.current()) # type: Target
target = attr.ib(factory=targets.current) # type: Target

@fingerprint.default
def _calculate_fingerprint(self):
Expand Down
13 changes: 10 additions & 3 deletions pex/vendor/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from redbaron import CommentNode, LiteralyEvaluable, NameNode, RedBaron

from pex.common import safe_delete, safe_mkdir, safe_mkdtemp, safe_open, safe_rmtree
from pex.interpreter import PythonInterpreter
from pex.vendor import VendorSpec, iter_vendor_specs


Expand Down Expand Up @@ -206,11 +207,12 @@ def find_site_packages(prefix_dir):
)


def vendorize(root_dir, vendor_specs, prefix, update):
def vendorize(interpreter, root_dir, vendor_specs, prefix, update):
# There is bootstrapping catch-22 here. In order for `pex.third_party` to work, all 3rdparty
# importable code must lie at the top of its vendored chroot. Although
# `pex.pep_376.Record.fixup_install` encodes the logic to achieve this layout, we can't run
# that without 1st approximating that layout!. We take the tack of doing the --prefix
# that without the current `pex.interpreter.PythonInterpreter` and 1st approximating that
# layout!. We take the tack of saving the current interpreter and then doing the `--prefix`
# install off into a temp dir, moving the site-packages importables into the vendor chroot,
# importing the code we'll need, then moving the importables back.
moved = {}
Expand Down Expand Up @@ -361,7 +363,9 @@ def vendorize(root_dir, vendor_specs, prefix, update):
project_name=dist.project_name,
version=dist.version,
)
record.fixup_install(exclude=("constraints.txt", "__init__.py", "__pycache__"))
record.fixup_install(
exclude=("constraints.txt", "__init__.py", "__pycache__"), interpreter=interpreter
)


if __name__ == "__main__":
Expand All @@ -375,10 +379,13 @@ def vendorize(root_dir, vendor_specs, prefix, update):
)
options = parser.parse_args()

# Grab the PythonInterpreter before we nuke the supporting code needed to identify it.
interpreter = PythonInterpreter.get()
root_directory = VendorSpec.ROOT
try:
safe_rmtree(VendorSpec.vendor_root())
vendorize(
interpreter=interpreter,
root_dir=root_directory,
vendor_specs=list(iter_vendor_specs()),
prefix="pex.third_party",
Expand Down
9 changes: 3 additions & 6 deletions tests/integration/test_issue_1232.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,10 @@ def vendored_toplevel(isolated_dir):
current_pex_isolation = set(current_isolated_vendoreds.keys()) ^ set(
current_pex_isolated_vendoreds.keys()
)
assert 1 == len(current_pex_isolation), (
"Since the modified Pex PEX was built from a Pex PEX an isolation of the Pex PEX bootstrap "
"code should have occurred bringing the total isolations up to two."
assert 0 == len(current_pex_isolation), (
"Since the current Pex PEX was built from the same Pex code as the current loose source "
"Pex, a new isolation of the Pex PEX bootstrap code should not have occurred."
)
current_pex_vendoreds = current_pex_isolated_vendoreds[current_pex_isolation.pop()]
assert "pip" not in current_pex_vendoreds, "Expected a Pex runtime isolation."
assert "wheel" not in current_pex_vendoreds, "Expected a Pex runtime isolation."

# 3. Isolate modified Pex PEX at build-time.
# ===
Expand Down
30 changes: 30 additions & 0 deletions tests/test_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,33 @@ def test_sys_path(python):
'Its expected the sys_path matches the runtime sys.path with the exception of the PWD ("") '
"head entry."
)


def test_sys_path_leak_for_current(tmpdir):
# type: (Any) -> None

expected_sys_path = [
entry
for entry in json.loads(
subprocess.check_output(
args=[
sys.executable,
"-sE",
"-c",
"import json, sys; json.dump(sys.path, sys.stdout)",
]
).decode("utf-8")
)
# N.B.: We expect the PythonInterpreter infrastructure to elide the implicit "" / CWD
# sys.path entry.
if entry
]
pex_root = os.path.join(str(tmpdir), "pex_root")

new_sys_path_entry = os.path.join(str(tmpdir), "not-interpreter-sys-path")
with ENV.patch(PEX_ROOT=pex_root), PythonInterpreter._cleared_memory_cache():
sys.path.append(new_sys_path_entry)
try:
assert tuple(expected_sys_path) == PythonInterpreter.get().sys_path
finally:
sys.path.pop()
Loading

0 comments on commit 1f20e4a

Please sign in to comment.