Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce --pre-resolved-dists resolver. #2512

Merged
merged 7 commits into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Release Notes

## 2.19.0

This release adds support for a new `--pre-resolved-dists` resolver as
an alternative to the existing Pip resolver, `--lock` resolver and
`--pex-repository` resolvers. Using `--pre-resolved-dists dists/dir/`
behaves much like `--no-pypi --find-links dists/dir/` except that it is
roughly 3x faster.

* Support `--pre-resolved-dists` resolver. (#2512)

## 2.18.1

This release fixes `--scie-name-style platform-parent-dir` introduced in
Expand Down
34 changes: 19 additions & 15 deletions pex/bin/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from pex.resolve.resolver_configuration import (
LockRepositoryConfiguration,
PexRepositoryConfiguration,
PreResolvedConfiguration,
)
from pex.resolve.resolver_options import create_pip_configuration
from pex.resolve.resolvers import Unsatisfiable, sorted_requirements
Expand Down Expand Up @@ -136,7 +137,9 @@ def configure_clp_pex_resolution(parser):
),
)

resolver_options.register(group, include_pex_repository=True, include_lock=True)
resolver_options.register(
group, include_pex_repository=True, include_lock=True, include_pre_resolved=True
)

group.add_argument(
"--pex-path",
Expand Down Expand Up @@ -1011,25 +1014,26 @@ def build_pex(
DependencyConfiguration.from_pex_info(requirements_pex_info)
)

if isinstance(resolver_configuration, (LockRepositoryConfiguration, PreResolvedConfiguration)):
pip_configuration = resolver_configuration.pip_configuration
elif isinstance(resolver_configuration, PexRepositoryConfiguration):
# TODO(John Sirois): Consider finding a way to support custom --index and --find-links in
# this case. I.E.: I use a corporate index to build a PEX repository and now I want to
# build a --project PEX whose pyproject.toml build-system.requires should be resolved from
# that corporate index.
pip_configuration = try_(
finalize_resolve_config(
create_pip_configuration(options), targets=targets, context="--project building"
)
)
else:
pip_configuration = resolver_configuration

project_dependencies = OrderedSet() # type: OrderedSet[Requirement]
with TRACER.timed(
"Adding distributions built from local projects and collecting their requirements: "
"{projects}".format(projects=" ".join(options.projects))
):
if isinstance(resolver_configuration, LockRepositoryConfiguration):
pip_configuration = resolver_configuration.pip_configuration
elif isinstance(resolver_configuration, PexRepositoryConfiguration):
# TODO(John Sirois): Consider finding a way to support custom --index and --find-links in this case.
# I.E.: I use a corporate index to build a PEX repository and now I want to build a --project PEX
# whose pyproject.toml build-system.requires should be resolved from that corporate index.
pip_configuration = try_(
finalize_resolve_config(
create_pip_configuration(options), targets=targets, context="--project building"
)
)
else:
pip_configuration = resolver_configuration

projects = project.get_projects(options)
built_projects = projects.build(
targets=targets,
Expand Down
1 change: 1 addition & 0 deletions pex/cli/commands/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ def _add_resolve_options(cls, parser):
cls._create_resolver_options_group(parser),
include_pex_repository=False,
include_lock=False,
include_pre_resolved=False,
)

@classmethod
Expand Down
4 changes: 3 additions & 1 deletion pex/cli/commands/venv.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ def _add_create_arguments(cls, parser):
)
installer_options.register(parser)
target_options.register(parser, include_platforms=True)
resolver_options.register(parser, include_pex_repository=True, include_lock=True)
resolver_options.register(
parser, include_pex_repository=True, include_lock=True, include_pre_resolved=True
)
requirement_options.register(parser)

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,11 @@ def can_write_dir(path):


def touch(file):
# type: (Text) -> None
# type: (_Text) -> _Text
"""Equivalent of unix `touch path`."""
with safe_open(file, "a"):
os.utime(file, None)
return file


class Chroot(object):
Expand Down
83 changes: 69 additions & 14 deletions pex/dist_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,65 @@ class InvalidMetadataError(MetadataError):
"""Indicates a metadata value that is invalid."""


def is_tar_sdist(path):
# type: (Text) -> bool
# N.B.: PEP-625 (https://peps.python.org/pep-0625/) says sdists must use .tar.gz, but we
# have a known example of tar.bz2 in the wild in python-constraint 1.4.0 on PyPI:
# https://pypi.org/project/python-constraint/1.4.0/#files
# This probably all stems from the legacy `python setup.py sdist` as last described here:
# https://docs.python.org/3.11/distutils/sourcedist.html
# There was a move to reject exotic formats in PEP-527 in 2016 and the historical sdist
# formats appear to be listed here: https://peps.python.org/pep-0527/#file-extensions
# A query on the PyPI dataset shows:
#
# SELECT
# REGEXP_EXTRACT(path, r'\.([^.]+|tar\.[^.]+|tar)$') as extension,
# count(*) as count
# FROM `bigquery-public-data.pypi.distribution_metadata`
# group by extension
# order by count desc
#
# | extension | count |
# |-----------|---------|
# | whl | 6332494 |
# * | tar.gz | 5283102 |
# | egg | 135940 |
# * | zip | 108532 |
# | exe | 18452 |
# * | tar.bz2 | 3857 |
# | msi | 625 |
# | rpm | 603 |
# * | tgz | 226 |
# | dmg | 47 |
# | deb | 36 |
# * | tar.zip | 2 |
# * | ZIP | 1 |
return path.lower().endswith((".tar.gz", ".tgz", ".tar.bz2"))


def is_zip_sdist(path):
# type: (Text) -> bool
return path.lower().endswith(".zip")


def is_sdist(path):
# type: (Text) -> bool
return is_tar_sdist(path) or is_zip_sdist(path)


def is_wheel(path):
# type: (Text) -> bool
return path.lower().endswith(".whl")


def _strip_sdist_path(sdist_path):
# type: (Text) -> Optional[Text]
if not sdist_path.endswith((".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".tar.xz", ".txz", ".zip")):
if not is_sdist(sdist_path):
return None

sdist_basename = os.path.basename(sdist_path)
filename, _ = os.path.splitext(sdist_basename)
if filename.endswith(".tar"):
if filename.lower().endswith(".tar"):
filename, _ = os.path.splitext(filename)
return filename

Expand Down Expand Up @@ -194,8 +245,19 @@ def read_function(rel_path):
)


def _read_from_zip(
zip_location, # type: str
rel_path, # type: Text
):
# type: (...) -> bytes
with open_zip(zip_location) as zf:
return zf.read(rel_path)


def find_wheel_metadata(location):
# type: (Text) -> Optional[MetadataFiles]

read_function = functools.partial(_read_from_zip, location)
with open_zip(location) as zf:
for name in zf.namelist():
if name.endswith("/"):
Expand All @@ -218,11 +280,6 @@ def find_wheel_metadata(location):
if dist_info_dir == head and tail != metadata_file_name:
files.append(rel_path)

def read_function(rel_path):
# type: (Text) -> bytes
with open_zip(location) as zf:
return zf.read(rel_path)

return MetadataFiles(
metadata=DistMetadataFile(
type=MetadataType.DIST_INFO,
Expand Down Expand Up @@ -330,7 +387,7 @@ def iter_metadata_files(
location, MetadataType.DIST_INFO, "*.dist-info", "METADATA"
)
)
elif location.endswith(".whl") and zipfile.is_zipfile(location):
elif is_wheel(location) and zipfile.is_zipfile(location):
metadata_files = find_wheel_metadata(location)
if metadata_files:
listing.append(metadata_files)
Expand All @@ -341,13 +398,11 @@ def iter_metadata_files(
)
)
elif MetadataType.PKG_INFO is metadata_type:
if location.endswith(".zip") and zipfile.is_zipfile(location):
if is_zip_sdist(location) and zipfile.is_zipfile(location):
metadata_file = find_zip_sdist_metadata(location)
if metadata_file:
listing.append(MetadataFiles(metadata=metadata_file))
elif location.endswith(
(".tar.gz", ".tgz", ".tar.bz2", ".tbz2", ".tar.xz", ".txz")
) and tarfile.is_tarfile(location):
elif is_tar_sdist(location) and tarfile.is_tarfile(location):
metadata_file = find_tar_sdist_metadata(location)
if metadata_file:
listing.append(MetadataFiles(metadata=metadata_file))
Expand Down Expand Up @@ -408,7 +463,7 @@ def from_filename(cls, path):
#
# The wheel filename convention is specified here:
# https://www.python.org/dev/peps/pep-0427/#file-name-convention.
if path.endswith(".whl"):
if is_wheel(path):
project_name, version, _ = os.path.basename(path).split("-", 2)
return cls(project_name=project_name, version=version)

Expand Down Expand Up @@ -903,7 +958,7 @@ def of(cls, location):
# type: (Text) -> DistributionType.Value
if os.path.isdir(location):
return cls.INSTALLED
if location.endswith(".whl") and zipfile.is_zipfile(location):
if is_wheel(location) and zipfile.is_zipfile(location):
return cls.WHEEL
return cls.SDIST

Expand Down
31 changes: 13 additions & 18 deletions pex/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,18 @@
from pex import dist_metadata, pex_warnings, targets
from pex.common import pluralize
from pex.dependency_configuration import DependencyConfiguration
from pex.dist_metadata import Distribution, Requirement
from pex.dist_metadata import Distribution, Requirement, is_wheel
from pex.fingerprinted_distribution import FingerprintedDistribution
from pex.inherit_path import InheritPath
from pex.interpreter import PythonInterpreter
from pex.layout import ensure_installed, identify_layout
from pex.orderedset import OrderedSet
from pex.pep_425 import CompatibilityTags, TagRank
from pex.pep_425 import TagRank
from pex.pep_503 import ProjectName
from pex.pex_info import PexInfo
from pex.targets import Target
from pex.third_party.packaging import specifiers
from pex.third_party.packaging.tags import Tag
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING

Expand Down Expand Up @@ -139,7 +140,7 @@ def render_message(self, _target):

@attr.s(frozen=True)
class _TagMismatch(_UnrankedDistribution):
wheel_tags = attr.ib() # type: CompatibilityTags
wheel_tags = attr.ib() # type: Iterable[Tag]

def render_message(self, target):
# type: (Target) -> str
Expand Down Expand Up @@ -332,32 +333,26 @@ def _update_candidate_distributions(self, distribution_iter):

def _can_add(self, fingerprinted_dist):
# type: (FingerprintedDistribution) -> Union[_RankedDistribution, _UnrankedDistribution]
filename, ext = os.path.splitext(os.path.basename(fingerprinted_dist.location))
if ext.lower() != ".whl":
filename = os.path.basename(fingerprinted_dist.location)
if not is_wheel(filename):
# This supports resolving pex's own vendored distributions which are vendored in a
# directory with the project name (`pip/` for pip) and not the corresponding wheel name
# (`pip-19.3.1-py2.py3-none-any.whl/` for pip). Pex only vendors universal wheels for
# all platforms it supports at buildtime and runtime so this is always safe.
return _RankedDistribution.highest_rank(fingerprinted_dist)

try:
wheel_tags = CompatibilityTags.from_wheel(fingerprinted_dist.location)
wheel_eval = self._target.wheel_applies(fingerprinted_dist.distribution)
except ValueError:
return _InvalidWheelName(fingerprinted_dist, filename)

# There will be multiple parsed tags for compressed tag sets. Ensure we grab the parsed tag
# with highest rank from that expanded set.
best_match = self._target.supported_tags.best_match(wheel_tags)
if best_match is None:
return _TagMismatch(fingerprinted_dist, wheel_tags)
if not wheel_eval.best_match:
return _TagMismatch(fingerprinted_dist, wheel_eval.tags)
if not wheel_eval.applies:
assert wheel_eval.requires_python
return _PythonRequiresMismatch(fingerprinted_dist, wheel_eval.requires_python)

python_requires = dist_metadata.requires_python(fingerprinted_dist.distribution)
if python_requires and not self._target.requires_python_applies(
python_requires, source=fingerprinted_dist.distribution.as_requirement()
):
return _PythonRequiresMismatch(fingerprinted_dist, python_requires)

return _RankedDistribution(best_match.rank, fingerprinted_dist)
return _RankedDistribution(wheel_eval.best_match.rank, fingerprinted_dist)

def activate(self):
# type: () -> Iterable[Distribution]
Expand Down
8 changes: 4 additions & 4 deletions pex/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,10 +744,10 @@ def iter_map_parallel(
#
input_items.sort(key=costing_function, reverse=True)

# We want each of the job slots above to process MULTIPROCESSING_MIN_AVERAGE_LOAD on average in
# order to overcome multiprocessing overheads. Of course, if there are fewer available cores
# than that or the user has pinned max jobs lower, we clamp to that. Finally, we always want at
# least two slots to ensure we process input items in parallel.
# We want each of the job slots above to process MULTIPROCESSING_DEFAULT_MIN_AVERAGE_LOAD on
# average in order to overcome multiprocessing overheads. Of course, if there are fewer
# available cores than that or the user has pinned max jobs lower, we clamp to that. Finally, we
# always want at least two slots to ensure we process input items in parallel.
pool_size = max(2, min(len(input_items) // min_average_load, _sanitize_max_jobs(max_jobs)))

apply_function = functools.partial(_apply_function, function)
Expand Down
5 changes: 3 additions & 2 deletions pex/pep_425.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import itertools
import os.path

from pex.dist_metadata import is_wheel
from pex.orderedset import OrderedSet
from pex.rank import Rank
from pex.third_party.packaging.tags import Tag, parse_tag
Expand Down Expand Up @@ -56,14 +57,14 @@ class CompatibilityTags(object):
@classmethod
def from_wheel(cls, wheel):
# type: (str) -> CompatibilityTags
wheel_stem, ext = os.path.splitext(os.path.basename(wheel))
if ".whl" != ext:
if not is_wheel(wheel):
raise ValueError(
"Can only calculate wheel tags from a filename that ends in .whl per "
"https://peps.python.org/pep-0427/#file-name-convention, given: {wheel!r}".format(
wheel=wheel
)
)
wheel_stem, _ = os.path.splitext(os.path.basename(wheel))
# Wheel filename format: https://www.python.org/dev/peps/pep-0427/#file-name-convention
# `{distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl`
wheel_components = wheel_stem.rsplit("-", 3)
Expand Down
2 changes: 1 addition & 1 deletion pex/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class VCSScheme(object):


def parse_scheme(scheme):
# type: (str) -> Optional[Union[str, ArchiveScheme.Value, VCSScheme]]
# type: (str) -> Union[str, ArchiveScheme.Value, VCSScheme]
match = re.match(
r"""
^
Expand Down
Loading