Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite direct URL reinstallation logic #10564

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions news/5780.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Reinstallation and upgrade behaviour of direct URL requirements are completely
rewritten to better match user expectation. Now pip will try to parse the URLs
and VCS information (if applicable) from both the incoming direct URLs and the
already-installed PEP 610 metadata to determine whether reinstallation is
needed. If pip guesses wrong, the user can also force pip to always reinstall
direct URL requirments (but not version-specified ones) with ``--upgrade``.
16 changes: 14 additions & 2 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,11 @@ def setuptools_filename(self) -> str:
def direct_url(self) -> Optional[DirectUrl]:
"""Obtain a DirectUrl from this distribution.

Returns None if the distribution has no `direct_url.json` metadata,
or if `direct_url.json` is invalid.
Returns None if the distribution has no ``direct_url.json`` metadata,
or if the ``direct_url.json`` content is invalid.

Note that this may return None for a legacy editable installation. See
also ``specified_by_url``.
"""
try:
content = self.read_text(DIRECT_URL_METADATA_NAME)
Expand Down Expand Up @@ -337,6 +340,15 @@ def requested(self) -> bool:
def editable(self) -> bool:
return bool(self.editable_project_location)

@property
def specified_by_url(self) -> bool:
"""WHether the distribution was originally installed via a direct URL.

This includes cases where the user used a path (since it is a shorthand
and internally treated very similarly as a URL).
"""
return self.direct_url is not None or self.editable

@property
def local(self) -> bool:
"""If distribution is installed in the current virtual environment.
Expand Down
47 changes: 47 additions & 0 deletions src/pip/_internal/models/direct_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ def __init__(
self.requested_revision = requested_revision
self.commit_id = commit_id

def equivalent(self, other: "InfoType") -> bool:
if not isinstance(other, VcsInfo):
return False
return self.vcs == other.vcs and self.commit_id == other.commit_id

@classmethod
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]:
if d is None:
Expand All @@ -89,6 +94,12 @@ def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]:
requested_revision=_get(d, str, "requested_revision"),
)

def __repr__(self) -> str:
return (
f"VcsInfo(vcs={self.vcs!r}, commit_id={self.commit_id!r}, "
f"requested_revision={self.requested_revision!r})"
)

def _to_dict(self) -> Dict[str, Any]:
return _filter_none(
vcs=self.vcs,
Expand All @@ -106,12 +117,20 @@ def __init__(
) -> None:
self.hash = hash

def equivalent(self, other: "InfoType") -> bool:
if not isinstance(other, ArchiveInfo):
return False
return self.hash == other.hash

@classmethod
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]:
if d is None:
return None
return cls(hash=_get(d, str, "hash"))

def __repr__(self) -> str:
return f"ArchiveInfo(hash={self.hash!r})"

def _to_dict(self) -> Dict[str, Any]:
return _filter_none(hash=self.hash)

Expand All @@ -125,12 +144,20 @@ def __init__(
) -> None:
self.editable = editable

def equivalent(self, other: "InfoType") -> bool:
if not isinstance(other, DirInfo):
return False
return self.editable == other.editable

@classmethod
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["DirInfo"]:
if d is None:
return None
return cls(editable=_get_required(d, bool, "editable", default=False))

def __repr__(self) -> str:
return f"DirInfo(editable={self.editable!r})"

def _to_dict(self) -> Dict[str, Any]:
return _filter_none(editable=self.editable or None)

Expand All @@ -149,6 +176,26 @@ def __init__(
self.info = info
self.subdirectory = subdirectory

def equivalent(self, other: Optional["DirectUrl"]) -> bool:
"""Whether two direct URL objects are equivalent.

This is different from ``__eq__`` in that two non-equal infos can be
"logically the same", e.g. two different Git branches cab be equivalent
if they resolve to the same commit.
"""
return (
other is not None
and self.url == other.url
and self.info.equivalent(other.info)
and self.subdirectory == other.subdirectory
)

def __repr__(self) -> str:
return (
f"DirectUrl(url={self.url!r}, info={self.info!r}, "
f"subdirectory={self.subdirectory!r})"
)

def _remove_auth_from_netloc(self, netloc: str) -> str:
if "@" not in netloc:
return netloc
Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ class _CleanResult(NamedTuple):

def _clean_link(link: Link) -> _CleanResult:
parsed = link._parsed_url
netloc = parsed.netloc.rsplit("@", 1)[-1]
netloc, _ = split_auth_from_netloc(parsed.netloc)
# According to RFC 8089, an empty host in file: means localhost.
if parsed.scheme == "file" and not netloc:
netloc = "localhost"
Expand Down
166 changes: 115 additions & 51 deletions src/pip/_internal/resolution/resolvelib/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@
from pip._internal.req.req_install import InstallRequirement
from pip._internal.req.req_set import RequirementSet
from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider
from pip._internal.resolution.resolvelib.provider import PipProvider
from pip._internal.resolution.resolvelib.reporter import (
PipDebuggingReporter,
PipReporter,
)
from pip._internal.utils.direct_url_helpers import direct_url_from_link

from .base import Candidate, Requirement
from .factory import Factory
from .provider import PipProvider
from .reporter import PipDebuggingReporter, PipReporter

if TYPE_CHECKING:
from pip._vendor.resolvelib.resolvers import Result as RLResult
Expand Down Expand Up @@ -67,6 +65,102 @@ def __init__(
self.upgrade_strategy = upgrade_strategy
self._result: Optional[Result] = None

def _get_ireq(
self,
candidate: Candidate,
direct_url_requested: bool,
) -> Optional[InstallRequirement]:
"""Get the InstallRequirement to install for a candidate.

Returning None means the candidate is already satisfied by the current
environment state and does not need to be handled.
"""
ireq = candidate.get_install_requirement()

# No ireq to install (e.g. extra-ed candidate). Skip.
if ireq is None:
return None

# The currently installed distribution of the same identifier.
installed_dist = self.factory.get_dist_to_uninstall(candidate)

if installed_dist is None: # Not installed. Install incoming candidate.
return ireq

# If we return this ireq, it should trigger uninstallation of the
# existing distribution for reinstallation.
ireq.should_reinstall = True

# Reinstall if --force-reinstall is set.
if self.factory.force_reinstall:
return ireq

# The artifact represented by the incoming candidate.
cand_link = candidate.source_link

# The candidate does not point to an artifact. This means the resolver
# has already decided the installed distribution is good enough.
if cand_link is None:
return None

# Whether --upgrade is specified.
upgrade_mode = self.upgrade_strategy != "to-satisfy-only"

# The incoming candidate was produced only from version requirements.
# Reinstall only if...
if not direct_url_requested:
# The currently installed distribution does not satisfy the
# requested version specification.
if installed_dist.version != candidate.version:
return ireq
# The currently installed distribution was from a direct URL, and
# an upgrade is requested.
if upgrade_mode and installed_dist.specified_by_url:
return ireq
return None

# At this point, the incoming candidate was produced from a direct URL.
# Determine whether to upgrade based on flags and whether the installed
# distribution was done via a direct URL.

# Always reinstall a direct candidate if it's on the local file system.
if cand_link.is_file:
return ireq

# Reinstall if --upgrade is specified.
if upgrade_mode:
return ireq

# The incoming candidate was produced from a direct URL, but the
# currently installed distribution was not. Always reinstall to be sure.
if not installed_dist.specified_by_url:
return ireq

# Editable candidate always triggers reinstallation.
if candidate.is_editable:
return ireq

# The currently installed distribution is editable, but the incoming
# candidate is not. Uninstall the editable one to match.
if installed_dist.editable:
return ireq
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I continue pondering this topic and I tried installing an editable then the same from index.
I was expecting to hit this line, but it did not, because candidate.get_install_requirement() returns None which makes _get_ireq exit immediately. Instead we get "Requirement already satisfied".

Copy link
Member

@pradyunsg pradyunsg Jun 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By "from index", you mean a pip install -e . installing a package named coffee and a pip install coffee returning Requirement already statisfied and no reinstall?

If yes, that's the behaviour I'd expect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By "from index", you mean a pip install -e . installing a package named coffee and a pip install coffee returning Requirement already statisfied and no reinstall?

That is what I meant, yes.

If yes, that's the behaviour I'd expect.

Fair. But that is not the behaviour described in the comment at line 139.

Copy link
Member Author

@uranusjr uranusjr Nov 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But that is not the behaviour described in the comment at line 139.

The behaviour described on line 139 is correct, because on line 113 we already established the incoming candidate at this point is URL-specified. The scenario you described would fall into the block around line 108.

This block can only be triggered by commands like this:

$ pip install -e .
$ pip install .  # Or any kind of non-editable URL specification.


# Now we know both the installed distribution and incoming candidate
# are based on direct URLs, and neither are editable. Don't reinstall
# if the direct URLs match. Note that there's a special case for VCS: a
# "unresolved" reference (e.g. branch) needs to be fully resolved for
# comparison, so an updated remote branch can trigger reinstallation.
# This is handled by the 'equivalent' implementation.
cand_direct_url = direct_url_from_link(
cand_link,
ireq.source_dir,
ireq.original_link_is_in_wheel_cache,
)
if cand_direct_url.equivalent(installed_dist.direct_url):
return None

return ireq

def resolve(
self, root_reqs: List[InstallRequirement], check_supported_wheels: bool
) -> RequirementSet:
Expand Down Expand Up @@ -101,59 +195,29 @@ def resolve(
raise error from e

req_set = RequirementSet(check_supported_wheels=check_supported_wheels)
for candidate in result.mapping.values():
ireq = candidate.get_install_requirement()
if ireq is None:
continue
for identifier, candidate in result.mapping.items():
# Whether the candidate was resolved from direct URL requirements.
direct_url_requested = any(
requirement.get_candidate_lookup()[0] is not None
for requirement in result.criteria[identifier].iter_requirement()
)

# Check if there is already an installation under the same name,
# and set a flag for later stages to uninstall it, if needed.
installed_dist = self.factory.get_dist_to_uninstall(candidate)
if installed_dist is None:
# There is no existing installation -- nothing to uninstall.
ireq.should_reinstall = False
elif self.factory.force_reinstall:
# The --force-reinstall flag is set -- reinstall.
ireq.should_reinstall = True
elif installed_dist.version != candidate.version:
# The installation is different in version -- reinstall.
ireq.should_reinstall = True
elif candidate.is_editable or installed_dist.editable:
# The incoming distribution is editable, or different in
# editable-ness to installation -- reinstall.
ireq.should_reinstall = True
elif candidate.source_link and candidate.source_link.is_file:
# The incoming distribution is under file://
if candidate.source_link.is_wheel:
# is a local wheel -- do nothing.
logger.info(
"%s is already installed with the same version as the "
"provided wheel. Use --force-reinstall to force an "
"installation of the wheel.",
ireq.name,
)
continue

# is a local sdist or path -- reinstall
ireq.should_reinstall = True
else:
ireq = self._get_ireq(candidate, direct_url_requested)
if ireq is None:
continue

link = candidate.source_link
if link and link.is_yanked:
# The reason can contain non-ASCII characters, Unicode
# is required for Python 2.
msg = (
reason = link.yanked_reason or "<none given>"
logger.warning(
"The candidate selected for download or install is a "
"yanked version: {name!r} candidate (version {version} "
"at {link})\nReason for being yanked: {reason}"
).format(
name=candidate.name,
version=candidate.version,
link=link,
reason=link.yanked_reason or "<none given>",
"yanked version: %r candidate (version %s at %s)\n"
"Reason for being yanked: %s",
candidate.name,
candidate.version,
link,
reason,
)
logger.warning(msg)

req_set.add_named_requirement(ireq)

Expand Down
47 changes: 47 additions & 0 deletions tests/functional/test_install_direct_url.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pathlib

import pytest

from pip._internal.models.direct_url import VcsInfo
Expand Down Expand Up @@ -63,3 +65,48 @@ def test_install_vcs_constraint_direct_file_url(script: PipTestEnvironment) -> N
constraints_file.write_text(f"git+{url}#egg=testpkg")
result = script.pip("install", "testpkg", "-c", constraints_file)
assert get_created_direct_url(result, "testpkg")


@pytest.mark.network
@pytest.mark.usefixtures("with_wheel")
def test_reinstall_vcs_does_not_modify(script: PipTestEnvironment) -> None:
url = "pip-test-package @ git+https://github.com/pypa/pip-test-package@master"
script.pip("install", "--no-cache-dir", url)

result = script.pip("install", url)
assert "Preparing " in result.stdout, str(result) # Should build.
assert "Installing " not in result.stdout, str(result) # But not install.


@pytest.mark.network
@pytest.mark.usefixtures("with_wheel")
def test_reinstall_cached_vcs_does_modify(
script: PipTestEnvironment, tmp_path: pathlib.Path
) -> None:
# Populate the wheel cache.
script.pip(
"wheel",
"--cache-dir",
tmp_path.joinpath("cache").as_posix(),
"--wheel-dir",
tmp_path.joinpath("wheelhouse").as_posix(),
"pip-test-package @ git+https://github.com/pypa/pip-test-package"
"@5547fa909e83df8bd743d3978d6667497983a4b7",
)
# Install a version from git.
script.pip(
"install",
"--cache-dir",
tmp_path.joinpath("cache").as_posix(),
"pip-test-package @ git+https://github.com/pypa/pip-test-package@0.1.1",
)
# Install the same version but from a different commit for which we have the wheel
# in cache, and verify that it does reinstall.
result = script.pip(
"install",
"--cache-dir",
tmp_path.joinpath("cache").as_posix(),
"pip-test-package @ git+https://github.com/pypa/pip-test-package"
"@5547fa909e83df8bd743d3978d6667497983a4b7",
)
assert "Installing " in result.stdout, str(result) # Should install.
Loading