Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Performance of Picking Best Candidate from Indexes #9748

Merged
merged 12 commits into from
Apr 3, 2021
Merged
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ tests/data/common_wheels/

# Mac
.DS_Store

# Profiling related artifacts
*.prof
1 change: 1 addition & 0 deletions news/9748.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve performance when picking the best file from indexes during `pip install`.
21 changes: 14 additions & 7 deletions src/pip/_internal/index/package_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,12 @@ def __init__(
self._project_name = project_name
self._specifier = specifier
self._supported_tags = supported_tags
# Since the index of the tag in the _supported_tags list is used
# as a priority, precompute a map from tag to index/priority to be
# used in wheel.find_most_preferred_tag.
self._wheel_tag_preferences = {
tag: idx for idx, tag in enumerate(supported_tags)
}

def get_applicable_candidates(
self,
Expand Down Expand Up @@ -470,7 +476,6 @@ def get_applicable_candidates(
hashes=self._hashes,
project_name=self._project_name,
)

return sorted(filtered_applicable_candidates, key=self._sort_key)

def _sort_key(self, candidate):
Expand Down Expand Up @@ -512,14 +517,17 @@ def _sort_key(self, candidate):
if link.is_wheel:
# can raise InvalidWheelFilename
wheel = Wheel(link.filename)
if not wheel.supported(valid_tags):
if self._prefer_binary:
binary_preference = 1
pradyunsg marked this conversation as resolved.
Show resolved Hide resolved
try:
pri = -(wheel.find_most_preferred_tag(
valid_tags, self._wheel_tag_preferences
))
except ValueError:
raise UnsupportedWheel(
"{} is not a supported wheel for this platform. It "
"can't be sorted.".format(wheel.filename)
)
if self._prefer_binary:
binary_preference = 1
pri = -(wheel.support_index_min(valid_tags))
if wheel.build_tag is not None:
match = re.match(r'^(\d+)(.*)$', wheel.build_tag)
build_tag_groups = match.groups()
Expand All @@ -544,8 +552,7 @@ def sort_best_candidate(
"""
if not candidates:
return None
best_candidate = max(candidates, key=self._sort_key)
return best_candidate
return max(candidates, key=self._sort_key)
pradyunsg marked this conversation as resolved.
Show resolved Hide resolved

def compute_best_candidate(
self,
Expand Down
27 changes: 24 additions & 3 deletions src/pip/_internal/models/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name that have meaning.
"""
import re
from typing import List
from typing import Any, Dict, List, Set, Union

from pip._vendor.packaging.tags import Tag

Expand Down Expand Up @@ -66,10 +66,31 @@ def support_index_min(self, tags):
"""
return min(tags.index(tag) for tag in self.file_tags if tag in tags)

def find_most_preferred_tag(self, tags, tag_to_priority):
# type: (List[Tag], Dict[Tag, int]) -> int
"""Return the priority of the most preferred tag that one of the wheel's file
tag combinations acheives in the given list of supported tags using the given
tag_to_priority mapping, where lower priorities are more-preferred.

This is used in place of support_index_min in some cases in order to avoid
an expensive linear scan of a large list of tags.

:param tags: the PEP 425 tags to check the wheel against.
:param tag_to_priority: a mapping from tag to priority of that tag, where
lower is more preferred.

:raises ValueError: If none of the wheel's file tags match one of
the supported tags.
"""
return min(tag_to_priority[tag] for tag in self.file_tags if tag in tag_to_priority)

def supported(self, tags):
# type: (List[Tag]) -> bool
# type: (Iterable[Tag]) -> bool
"""Return whether the wheel is compatible with one of the given tags.

:param tags: the PEP 425 tags to check the wheel against.
"""
return not self.file_tags.isdisjoint(tags)
for itag in self.file_tags:
if itag in tags:
return True
return False
jbylund marked this conversation as resolved.
Show resolved Hide resolved