Skip to content

Commit

Permalink
Get hashes from PyPI JSON API
Browse files Browse the repository at this point in the history
  • Loading branch information
atugushev committed Apr 21, 2020
1 parent a33e653 commit 82d093f
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 8 deletions.
98 changes: 91 additions & 7 deletions piptools/repositories/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pip._internal.cache import WheelCache
from pip._internal.commands import create_command
from pip._internal.models.index import PyPI
from pip._internal.models.index import PackageIndex, PyPI
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.req import RequirementSet
Expand All @@ -18,12 +18,14 @@
from pip._internal.utils.misc import normalize_path
from pip._internal.utils.temp_dir import TempDirectory, global_tempdir_manager
from pip._internal.utils.urls import path_to_url, url_to_path
from pip._vendor.requests import RequestException

from .._compat import PIP_VERSION, TemporaryDirectory, contextlib
from ..click import progressbar
from ..exceptions import NoCandidateFound
from ..logging import log
from ..utils import (
as_tuple,
fs_str,
is_pinned_requirement,
is_url_requirement,
Expand Down Expand Up @@ -227,6 +229,47 @@ def get_dependencies(self, ireq):

return self._dependencies_cache[ireq]

def _get_project(self, ireq):
"""
Return a dict of a project info from PyPI JSON API for a given
InstallRequirement. Return None on HTTP/JSON error or if a package
is not found on PyPI server.
API reference: https://warehouse.readthedocs.io/api-reference/json/
"""
package_indexes = (
PackageIndex(url=index_url, file_storage_domain="")
for index_url in self.finder.search_scope.index_urls
)
for package_index in package_indexes:
url = "{url}/{name}/json".format(url=package_index.pypi_url, name=ireq.name)
try:
response = self.session.get(url)
except RequestException as e:
log.debug(
"Fetch package info from PyPI failed: {url}: {e}".format(
url=url, e=e
)
)
continue

# Skip this PyPI server, because there is no package
# or JSON API might be not supported
if response.status_code == 404:
continue

try:
data = response.json()
except ValueError as e:
log.debug(
"Cannot parse JSON response from PyPI: {url}: {e}".format(
url=url, e=e
)
)
continue
return data
return None

def get_hashes(self, ireq):
"""
Given an InstallRequirement, return a set of hashes that represent all
Expand Down Expand Up @@ -257,6 +300,50 @@ def get_hashes(self, ireq):
if not is_pinned_requirement(ireq):
raise TypeError("Expected pinned requirement, got {}".format(ireq))

log.debug("{}".format(ireq.name))

with log.indentation():
hashes = self._get_hashes_from_pypi(ireq)
if hashes is None:
log.log("Couldn't get hashes from PyPI, fallback to hashing files")
return self._get_hashes_from_files(ireq)

return hashes

def _get_hashes_from_pypi(self, ireq):
"""
Return a set of hashes from PyPI JSON API for a given InstallRequirement.
Return None if fetching data is failed or missing digests.
"""
project = self._get_project(ireq)
if project is None:
return None

_, version, _ = as_tuple(ireq)

try:
release_files = project["releases"][version]
except KeyError:
log.debug("Missing release files on PyPI")
return None

try:
hashes = {
"{algo}:{digest}".format(
algo=FAVORITE_HASH, digest=file_["digests"][FAVORITE_HASH]
)
for file_ in release_files
}
except KeyError:
log.debug("Missing digests of release files on PyPI")
return None

return hashes

def _get_hashes_from_files(self, ireq):
"""
Return a set of hashes for all release files of a given InstallRequirement.
"""
# We need to get all of the candidates that match our current version
# pin, these will represent all of the files that could possibly
# satisfy this constraint.
Expand All @@ -267,12 +354,9 @@ def get_hashes(self, ireq):
)
matching_candidates = candidates_by_version[matching_versions[0]]

log.debug(ireq.name)

with log.indentation():
return {
self._get_file_hash(candidate.link) for candidate in matching_candidates
}
return {
self._get_file_hash(candidate.link) for candidate in matching_candidates
}

def _get_file_hash(self, link):
log.debug("Hashing {}".format(link.url_without_fragment))
Expand Down
137 changes: 136 additions & 1 deletion tests/test_repository_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from pip._internal.models.link import Link
from pip._internal.utils.urls import path_to_url
from pip._vendor.requests import Session
from pip._vendor.requests import HTTPError, Session

from piptools._compat import PIP_VERSION
from piptools.repositories import PyPIRepository
Expand Down Expand Up @@ -166,3 +166,138 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir):
)

assert not pypi_repository.options.cache_dir


@pytest.mark.parametrize(
"project_data, expected_hashes",
(
pytest.param(
{"releases": {"0.1": [{"digests": {"sha256": "fake-hash"}}]}},
{"sha256:fake-hash"},
id="return single hash",
),
pytest.param(
{
"releases": {
"0.1": [
{"digests": {"sha256": "fake-hash-number1"}},
{"digests": {"sha256": "fake-hash-number2"}},
]
}
},
{"sha256:fake-hash-number1", "sha256:fake-hash-number2"},
id="return multiple hashes",
),
pytest.param(None, None, id="not found project data"),
pytest.param({}, None, id="not found releases key"),
pytest.param({"releases": {}}, None, id="not found version"),
pytest.param({"releases": {"0.1": [{}]}}, None, id="not found digests"),
pytest.param(
{"releases": {"0.1": [{"digests": {}}]}}, None, id="digests are empty"
),
pytest.param(
{"releases": {"0.1": [{"digests": {"md5": "fake-hash"}}]}},
None,
id="not found sha256 algo",
),
),
)
def test_get_hashes_from_pypi(from_line, tmpdir, project_data, expected_hashes):
"""
Test PyPIRepository._get_hashes_from_pypi() returns expected hashes or None.
"""

class MockPyPIRepository(PyPIRepository):
def _get_project(self, ireq):
return project_data

pypi_repository = MockPyPIRepository(
["--no-cache-dir"], cache_dir=str(tmpdir / "pypi-repo-cache")
)
ireq = from_line("fake-package==0.1")

actual_hashes = pypi_repository._get_hashes_from_pypi(ireq)
assert actual_hashes == expected_hashes


def test_get_project__returns_data(from_line, tmpdir, monkeypatch, pypi_repository):
"""
Test PyPIRepository._get_project() returns expected project data.
"""
expected_data = {"releases": {"0.1": [{"digests": {"sha256": "fake-hash"}}]}}

class MockResponse:
status_code = 200

@staticmethod
def json():
return expected_data

def mock_get(*args, **kwargs):
return MockResponse()

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data == expected_data


def test_get_project__handles_http_error(
from_line, tmpdir, monkeypatch, pypi_repository
):
"""
Test PyPIRepository._get_project() returns None if HTTP error is raised.
"""

def mock_get(*args, **kwargs):
raise HTTPError("test http error")

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data is None


def test_get_project__handles_json_decode_error(
from_line, tmpdir, monkeypatch, pypi_repository
):
"""
Test PyPIRepository._get_project() returns None if JSON decode error is raised.
"""

class MockResponse:
status_code = 200

@staticmethod
def json():
raise ValueError("test json error")

def mock_get(*args, **kwargs):
return MockResponse()

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data is None


def test_get_project__handles_404(from_line, tmpdir, monkeypatch, pypi_repository):
"""
Test PyPIRepository._get_project() returns None if PyPI
response's status code is 404.
"""

class MockResponse:
status_code = 404

def mock_get(*args, **kwargs):
return MockResponse()

monkeypatch.setattr(pypi_repository.session, "get", mock_get)
ireq = from_line("fake-package==0.1")

actual_data = pypi_repository._get_project(ireq)
assert actual_data is None

0 comments on commit 82d093f

Please sign in to comment.