Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch vendoring from pip install ... to Pex install. #2306

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
252 changes: 2 additions & 250 deletions pex/pep_376.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,16 @@
import json
import os
import shutil
from contextlib import closing
from fileinput import FileInput

from pex import hashing
from pex.common import is_pyc_dir, is_pyc_file, is_python_script, safe_mkdir, safe_open
from pex.compatibility import get_stdout_bytes_buffer, urlparse
from pex.dist_metadata import Distribution, EntryPoint, MetadataFiles, MetadataType
from pex.common import is_pyc_dir, is_pyc_file, safe_mkdir, safe_open
from pex.interpreter import PythonInterpreter
from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.typing import TYPE_CHECKING, cast
from pex.venv.virtualenv import Virtualenv

if TYPE_CHECKING:
from typing import (
Callable,
Container,
Dict,
Iterable,
Iterator,
Optional,
Protocol,
Text,
Tuple,
Union,
)
from typing import Callable, Iterable, Iterator, Optional, Protocol, Text, Tuple, Union

import attr # vendor:skip

Expand Down Expand Up @@ -483,240 +467,8 @@ def read(
size = int(file_size) if file_size else None
yield InstalledFile(path=path, hash=file_hash, size=size)

@staticmethod
def _find_installation(
prefix_dir, # type: str
project_name, # type: str
version, # type: str
):
# type: (...) -> Optional[MetadataFiles]

canonical_project_name = ProjectName(project_name)
canonical_version = Version(version)

# Some distributions in the wild (namely python-certifi-win32 1.6.1,
# see: https://github.com/pantsbuild/pex/issues/1861) create their own directories named
# `site-packages` that are not in-fact located in site-packages (the "purelib" or "platlib"
# sysconfig install paths). Work around these broken packages by just looking for all
# `site-packages` subdirectories of the `prefix_dir` and checking each for the installation
# `RECORD`. There should always be just one such installation `RECORD` resulting from a
# `pip install --prefix <prefix_dir> --no-deps <wheel file>` and so this is safe.
site_packages_dirs = [
os.path.join(root, d)
for root, dirs, _ in os.walk(prefix_dir)
for d in dirs
if d == "site-packages"
]
for site_packages_dir in site_packages_dirs:
metadata_files = MetadataType.DIST_INFO.load_metadata(
site_packages_dir, project_name=canonical_project_name
)
if metadata_files and canonical_version == metadata_files.metadata.version:
return metadata_files
return None

@classmethod
def from_pip_prefix_install(
cls,
prefix_dir, # type: str
project_name, # type: str
version, # type: str
):
# type: (...) -> Record
metadata_files = cls._find_installation(prefix_dir, project_name, version)
if not metadata_files:
raise RecordNotFoundError(
"Could not find project metadata for {project_name} {version} under "
"{prefix_dir}".format(
project_name=project_name, version=version, prefix_dir=prefix_dir
)
)
record_relpath = metadata_files.metadata_file_rel_path("RECORD")
if not record_relpath:
raise RecordNotFoundError(
"Could not find the installation RECORD for {project_name} {version} under "
"{location}".format(
project_name=project_name,
version=version,
location=metadata_files.metadata.location,
)
)

rel_base_dir = os.path.relpath(metadata_files.metadata.location, prefix_dir)
return cls(
project_name=project_name,
version=version,
prefix_dir=prefix_dir,
rel_base_dir=rel_base_dir,
relative_path=record_relpath,
)

project_name = attr.ib() # type: str
version = attr.ib() # type: str
prefix_dir = attr.ib() # type: str
rel_base_dir = attr.ib() # type: Text
relative_path = attr.ib() # type: Text

def _find_dist_info_file(self, filename):
# type: (str) -> Optional[DistInfoFile]
metadata_files = MetadataType.DIST_INFO.load_metadata(
location=os.path.join(self.prefix_dir, self.rel_base_dir),
project_name=ProjectName(self.project_name),
)
if metadata_files is None:
return None

metadata_file_rel_path = metadata_files.metadata_file_rel_path(filename)
if metadata_file_rel_path is None:
return None

content = metadata_files.read(filename)
if content is None:
return None

file_path = os.path.join(metadata_files.metadata.location, metadata_file_rel_path)
return DistInfoFile(path=file_path, content=content)

def fixup_install(
self,
exclude=(), # type: Container[str]
interpreter=None, # type: Optional[PythonInterpreter]
):
# type: (...) -> InstalledWheel
"""Fixes a wheel install to be reproducible and importable.

After fixed up, this RECORD can be used to re-install the wheel in a venv with `reinstall`.

:param exclude: Any top-level items to exclude.
:param interpreter: The interpreter used to perform the wheel install.
"""
self._fixup_scripts()
self._fixup_direct_url()

# The RECORD is unused in PEX zipapp mode and only needed in venv mode. Since it can contain
# relative path entries that differ between interpreters - notably pypy for Python < 3.8 has
# a custom scheme - we just delete the file and create it on-demand for venv re-installs.
os.unlink(os.path.join(self.prefix_dir, self.rel_base_dir, self.relative_path))

# An example of the installed wheel chroot we're aiming for:
# .prefix/bin/... # scripts
# .prefix/include/site/pythonX.Y/... # headers
# .prefix/share/... # data files
# greenlet/... # importables
# greenlet-1.1.2.dist-info/... # importables
stash_dir = ".prefix"
prefix_stash = os.path.join(self.prefix_dir, stash_dir)
safe_mkdir(prefix_stash)

# 1. Move everything into the stash.
for item in os.listdir(self.prefix_dir):
if stash_dir == item or item in exclude:
continue
shutil.move(os.path.join(self.prefix_dir, item), os.path.join(prefix_stash, item))
# 2. Normalize all `*/{python ver}` paths to `*/pythonX.Y`
for root, dirs, _ in os.walk(prefix_stash):
dirs_to_scan = []
for d in dirs:
path = os.path.join(root, d)
normalized_path = InstalledFile.normalized_path(path, interpreter=interpreter)
if normalized_path != path:
shutil.move(path, normalized_path)
else:
dirs_to_scan.append(d)
dirs[:] = dirs_to_scan

# 3. Move `site-packages` content back up to the prefix dir chroot so that content is
# importable when this prefix dir chroot is added to the `sys.path` in PEX zipapp mode.
importable_stash = InstalledFile.normalized_path(
os.path.join(prefix_stash, self.rel_base_dir), interpreter=interpreter
)
for importable_item in os.listdir(importable_stash):
shutil.move(
os.path.join(importable_stash, importable_item),
os.path.join(self.prefix_dir, importable_item),
)
os.rmdir(importable_stash)

return InstalledWheel.save(
prefix_dir=self.prefix_dir,
stash_dir=stash_dir,
record_relpath=self.relative_path,
)

def _fixup_scripts(self):
# type: (...) -> None
bin_dir = os.path.join(self.prefix_dir, "bin")
if not os.path.isdir(bin_dir):
return

console_scripts = {} # type: Dict[Text, EntryPoint]
entry_points_file = self._find_dist_info_file("entry_points.txt")
if entry_points_file:
console_scripts.update(
Distribution.parse_entry_map(entry_points_file.content).get("console_scripts", {})
)

scripts = {} # type: Dict[str, Optional[bytes]]
for script_name in os.listdir(bin_dir):
script_path = os.path.join(bin_dir, script_name)
if is_python_script(script_path):
scripts[script_path] = None
elif script_name in console_scripts:
# When a wheel is installed by Pip and that wheel contains console_scripts, they are
# normally written with a faux-shebang of:
# #!python
#
# Pex relies on this hermetic shebang and only ever reifies it when creating venvs.
#
# If Pip is being run under a Python executable with a path length >127 characters
# on Linux though, it writes a shebang / header of:
# #!/bin/sh
# '''exec' <too long path to Pip venv python> "$0" "$@"'
# ' '''
#
# That header is immediately followed by the expected console_script shim contents:
# # -*- coding: utf-8 -*-
# import re
# import sys
# from <ep_module> import <ep_func>
# if __name__ == '__main__':
# sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
# sys.exit(main())
#
# Instead of guessing that 127 characters is the shebang length limit and using
# Pip's safety-hatch `/bin/sh` trick, we forcibly re-write the header to be just the
# expected `#!python` shebang. We detect the end of the header with the known 1st
# line of console_script shim ~code defined in
# pex/vendor/_vendored/pip/pip/_vendor/distlib/scripts.py on line 41:
# https://github.com/pantsbuild/pex/blob/196b4cd5b8dd4b4af2586460530e9a777262be7d/pex/vendor/_vendored/pip/pip/_vendor/distlib/scripts.py#L41
scripts[script_path] = b"# -*- coding: utf-8 -*-"
if not scripts:
return

with closing(FileInput(files=scripts.keys(), inplace=True, mode="rb")) as script_fi:
first_non_shebang_line = None # type: Optional[bytes]
for line in script_fi:
buffer = get_stdout_bytes_buffer()
if script_fi.isfirstline():
first_non_shebang_line = scripts[script_fi.filename()]
# Ensure python shebangs are reproducible. The only place these can be used is
# in venv mode PEXes where the `#!python` placeholder shebang will be re-written
# to use the venv's python interpreter.
buffer.write(b"#!python\n")
elif (
not first_non_shebang_line
or cast(bytes, line).strip() == first_non_shebang_line
):
# N.B.: These lines include the newline already.
buffer.write(cast(bytes, line))
first_non_shebang_line = None

def _fixup_direct_url(self):
# type: () -> None
direct_url_file = self._find_dist_info_file("direct_url.json")
if direct_url_file:
if (
urlparse.urlparse(json.loads(direct_url_file.content.decode("utf-8"))["url"]).scheme
== "file"
):
os.unlink(direct_url_file.path)
19 changes: 11 additions & 8 deletions pex/pep_427.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,13 +394,16 @@ def record_files(
print("pex", file=fp)
installed_files.append(InstalledWheel.create_installed_file(path=fp.name, dest_dir=dest))

if requested:
requested_path = os.path.join(dest, wheel.metadata_path("REQUESTED"))
touch(requested_path)
installed_files.append(
InstalledWheel.create_installed_file(path=requested_path, dest_dir=dest)
)
if interpreter:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the change from unconditional to conditional for these files?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

REQUESTED is not required at all; see: https://peps.python.org/pep-0376/#one-dist-info-directory-per-installed-distribution.

RECORD is required to support uninstalling as noted; so we still include it when an interpreter is supplied (installing in a venv) for interoperability with external tools like pip uninstall, but not for installed wheel chroots since those are private and never uninstalled by anyone but Pex, and, since they are chroots, this can be done with a simple recursive chroot dir remove.

So, basically, this just saves a small amount of work for the most common Pex wheel install case.

# Finalize a proper venv install with REQUESTED and a RECORD to support un-installing.
if requested:
requested_path = os.path.join(dest, wheel.metadata_path("REQUESTED"))
touch(requested_path)
installed_files.append(
InstalledWheel.create_installed_file(path=requested_path, dest_dir=dest)
)

installed_files.append(InstalledFile(path=record_relpath, hash=None, size=None))
Record.write(dst=record_abspath, installed_files=installed_files)

installed_files.append(InstalledFile(path=record_relpath, hash=None, size=None))
Record.write(dst=record_abspath, installed_files=installed_files)
return wheel.metadata_files
2 changes: 1 addition & 1 deletion pex/vendor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def git(
rewrite=True, # type: bool
constraints=(), # type: Tuple[str, ...]
):
requirement = "git+{repo}@{commit}#egg={project_name}".format(
requirement = "{project_name} @ git+{repo}@{commit}".format(
repo=repo, commit=commit, project_name=project_name
)
if not prep_command:
Expand Down
Loading