From e41bf0230e7c5bf58e17692ee2880ff775a164f7 Mon Sep 17 00:00:00 2001 From: Marcus Smith <qwcode@gmail.com> Date: Sat, 1 Feb 2014 11:41:55 -0800 Subject: [PATCH 1/2] 'pip wheel' should download wheels, when it finds them --- pip/commands/wheel.py | 11 ++-- pip/download.py | 78 +++++++++++++++++++++++------ pip/req/req_set.py | 84 +++++++++++++++++++++++-------- pip/wheel.py | 14 +++--- tests/functional/test_wheel.py | 17 ++++++- tests/unit/test_download.py | 91 +++++++++++++++++++++++++++++++++- 6 files changed, 241 insertions(+), 54 deletions(-) diff --git a/pip/commands/wheel.py b/pip/commands/wheel.py index edf3ef7ad6b..757891a5b52 100644 --- a/pip/commands/wheel.py +++ b/pip/commands/wheel.py @@ -165,13 +165,15 @@ def run(self, options, args): ignore_dependencies=options.ignore_dependencies, ignore_installed=True, session=session, + wheel_download_dir=options.wheel_dir ) + # make the wheelhouse + if not os.path.exists(options.wheel_dir): + os.makedirs(options.wheel_dir) + #parse args and/or requirements files for name in args: - if name.endswith(".whl"): - logger.notify("ignoring %s" % name) - continue requirement_set.add_requirement( InstallRequirement.from_line(name, None)) @@ -181,8 +183,7 @@ def run(self, options, args): finder=finder, options=options, session=session): - if (req.editable - or (req.name is None and req.url.endswith(".whl"))): + if req.editable: logger.notify("ignoring %s" % req.url) continue requirement_set.add_requirement(req) diff --git a/pip/download.py b/pip/download.py index a7fd71c73df..a9169842acd 100644 --- a/pip/download.py +++ b/pip/download.py @@ -348,18 +348,6 @@ def unpack_vcs_link(link, location, only_download=False): vcs_backend.unpack(location) -def unpack_file_url(link, location): - source = url_to_path(link.url) - content_type = mimetypes.guess_type(source)[0] - if os.path.isdir(source): - # delete the location since shutil will create it again :( - if os.path.isdir(location): - rmtree(location) - shutil.copytree(source, location, symlinks=True) - else: - unpack_file(source, location, content_type, link) - - def _get_used_vcs_backend(link): for backend in vcs.backends: if link.scheme in backend.schemes: @@ -507,7 +495,6 @@ def _copy_file(filename, location, content_type, link): shutil.move(download_location, dest_file) if copy: shutil.copy(filename, download_location) - logger.indent -= 2 logger.notify('Saved %s' % display_path(download_location)) @@ -519,11 +506,12 @@ def unpack_http_url(link, location, download_cache, download_dir=None, temp_dir = tempfile.mkdtemp('-unpack', 'pip-') temp_location = None target_url = link.url.split('#', 1)[0] - already_cached = False cache_file = None cache_content_type_file = None download_hash = None + + # If a download cache is specified, is the file cached there? if download_cache: cache_file = os.path.join( download_cache, @@ -537,12 +525,14 @@ def unpack_http_url(link, location, download_cache, download_dir=None, if not os.path.isdir(download_cache): create_download_cache_folder(download_cache) + # If a download dir is specified, is the file already downloaded there? already_downloaded = None if download_dir: already_downloaded = os.path.join(download_dir, link.filename) if not os.path.exists(already_downloaded): already_downloaded = None + # If already downloaded, does it's hash match? if already_downloaded: temp_location = already_downloaded content_type = mimetypes.guess_type(already_downloaded)[0] @@ -560,8 +550,7 @@ def unpack_http_url(link, location, download_cache, download_dir=None, os.unlink(already_downloaded) already_downloaded = None - # We have a cached file, and we haven't already found a good downloaded - # copy + # If not a valid download, let's confirm the cached file is valid if already_cached and not temp_location: with open(cache_content_type_file) as fp: content_type = fp.read().strip() @@ -582,6 +571,7 @@ def unpack_http_url(link, location, download_cache, download_dir=None, already_cached = False # We don't have either a cached or a downloaded copy + # let's download to a tmp dir if not temp_location: try: resp = session.get(target_url, stream=True) @@ -614,11 +604,67 @@ def unpack_http_url(link, location, download_cache, download_dir=None, if link.hash and link.hash_name: _check_hash(download_hash, link) + # a download dir is specified; let's copy the archive there if download_dir and not already_downloaded: _copy_file(temp_location, download_dir, content_type, link) + + # unpack the archive to the build dir location. even when only downloading + # archives, they have to be unpacked to parse dependencies unpack_file(temp_location, location, content_type, link) + + # if using a download cache, cache it, if needed if cache_file and not already_cached: cache_download(cache_file, temp_location, content_type) + if not (already_cached or already_downloaded): os.unlink(temp_location) + os.rmdir(temp_dir) + + +def unpack_file_url(link, location, download_dir=None): + + link_path = url_to_path(link.url_without_fragment) + from_path = None + already_downloaded = False + + # If it's a url to a local directory + if os.path.isdir(link_path): + if os.path.isdir(location): + rmtree(location) + shutil.copytree(link_path, location, symlinks=True) + return + + # If a download dir is specified, is the file already there and valid? + if download_dir: + download_path = os.path.join(download_dir, link.filename) + if os.path.exists(download_path): + content_type = mimetypes.guess_type(download_path)[0] + logger.notify('File was already downloaded %s' % download_path) + if link.hash: + download_hash = _get_hash_from_file(download_path, link) + try: + _check_hash(download_hash, link) + already_downloaded = True + except HashMismatch: + logger.warn( + 'Previously-downloaded file %s has bad hash, ' + 're-downloading.' % link_path + ) + os.unlink(download_path) + else: + already_downloaded = True + + # a download dir is specified and not already downloaded + if download_dir and not already_downloaded: + content_type = mimetypes.guess_type(link_path)[0] + _copy_file(link_path, download_dir, content_type, link) + + # unpack the archive to the build dir location. even when only downloading + # archives, they have to be unpacked to parse dependencies + if already_downloaded: + from_path = download_path + else: + from_path = link_path + content_type = mimetypes.guess_type(from_path)[0] + unpack_file(from_path, location, content_type, link) diff --git a/pip/req/req_set.py b/pip/req/req_set.py index e398e5072af..3391e6f7d64 100644 --- a/pip/req/req_set.py +++ b/pip/req/req_set.py @@ -9,14 +9,14 @@ from pip.exceptions import (InstallationError, BestVersionAlreadyInstalled, DistributionNotFound, PreviousBuildDirError) from pip.index import Link -from pip.locations import ( - PIP_DELETE_MARKER_FILENAME, write_delete_marker_file, build_prefix, -) +from pip.locations import (PIP_DELETE_MARKER_FILENAME, build_prefix, + write_delete_marker_file) from pip.log import logger from pip.req.req_install import InstallRequirement from pip.util import (display_path, rmtree, dist_in_usersite, call_subprocess, _make_build_dir) from pip.vcs import vcs +from pip.wheel import wheel_ext class Requirements(object): @@ -53,10 +53,12 @@ def __init__(self, build_dir, src_dir, download_dir, download_cache=None, upgrade=False, ignore_installed=False, as_egg=False, target_dir=None, ignore_dependencies=False, force_reinstall=False, use_user_site=False, session=None, - pycompile=True): + pycompile=True, wheel_download_dir=None): self.build_dir = build_dir self.src_dir = src_dir self.download_dir = download_dir + if download_cache: + download_cache = os.path.expanduser(download_cache) self.download_cache = download_cache self.upgrade = upgrade self.ignore_installed = ignore_installed @@ -74,6 +76,7 @@ def __init__(self, build_dir, src_dir, download_dir, download_cache=None, self.target_dir = target_dir # set from --target option self.session = session or PipSession() self.pycompile = pycompile + self.wheel_download_dir = wheel_download_dir def __str__(self): reqs = [req for req in self.requirements.values() @@ -209,6 +212,11 @@ def prepare_files(self, finder, force_root_egg_info=False, bundle=False): install = True best_installed = False not_found = None + + ############################################### + ## Search for archive to fulfill requirement ## + ############################################### + if not self.ignore_installed and not req_to_install.editable: req_to_install.check_if_exists() if req_to_install.satisfied_by: @@ -258,6 +266,11 @@ def prepare_files(self, finder, force_root_egg_info=False, bundle=False): else: logger.notify('Downloading/unpacking %s' % req_to_install) logger.indent += 2 + + ################################## + ## vcs update or unpack archive ## + ################################## + try: is_bundle = False is_wheel = False @@ -323,9 +336,21 @@ def prepare_files(self, finder, force_root_egg_info=False, bundle=False): assert url if url: try: + + if ( + url.filename.endswith(wheel_ext) + and self.wheel_download_dir + ): + # when doing 'pip wheel` + download_dir = self.wheel_download_dir + do_download = True + else: + download_dir = self.download_dir + do_download = self.is_download self.unpack_url( - url, location, self.is_download, - ) + url, location, download_dir, + do_download, + ) except HTTPError as exc: logger.fatal( 'Could not install requirement %s because ' @@ -340,7 +365,7 @@ def prepare_files(self, finder, force_root_egg_info=False, bundle=False): unpack = False if unpack: is_bundle = req_to_install.is_bundle - is_wheel = url and url.filename.endswith('.whl') + is_wheel = url and url.filename.endswith(wheel_ext) if is_bundle: req_to_install.move_bundle_files( self.build_dir, @@ -356,6 +381,11 @@ def prepare_files(self, finder, force_root_egg_info=False, bundle=False): req_to_install.run_egg_info() if url and url.scheme in vcs.all_schemes: req_to_install.archive(self.download_dir) + + ############################## + ## parse wheel dependencies ## + ############################## + elif is_wheel: req_to_install.source_dir = location req_to_install.url = url.url @@ -413,6 +443,11 @@ def prepare_files(self, finder, force_root_egg_info=False, bundle=False): req_to_install ) install = False + + ############################## + ## parse sdist dependencies ## + ############################## + if not (is_bundle or is_wheel): ## FIXME: shouldn't be globally added: finder.add_dependency_links( @@ -503,29 +538,36 @@ def copy_to_build_dir(self, req_to_install): call_subprocess(["python", "%s/setup.py" % dest, "clean"], cwd=dest, command_desc='python setup.py clean') - def unpack_url(self, link, location, only_download=False): - if only_download: - loc = self.download_dir - else: - loc = location + def unpack_url(self, link, location, download_dir=None, + only_download=False): + if download_dir is None: + download_dir = self.download_dir + + # non-editable vcs urls if is_vcs_url(link): - return unpack_vcs_link(link, loc, only_download) - # a local file:// index could have links with hashes - elif not link.hash and is_file_url(link): - return unpack_file_url(link, loc) + if only_download: + loc = download_dir + else: + loc = location + unpack_vcs_link(link, loc, only_download) + + # file urls + elif is_file_url(link): + unpack_file_url(link, location, download_dir) + if only_download: + write_delete_marker_file(location) + + # http urls else: - if self.download_cache: - self.download_cache = os.path.expanduser(self.download_cache) - retval = unpack_http_url( + unpack_http_url( link, location, self.download_cache, - self.download_dir, + download_dir, self.session, ) if only_download: write_delete_marker_file(location) - return retval def install(self, install_options, global_options=(), *args, **kwargs): """ diff --git a/pip/wheel.py b/pip/wheel.py index 849cb41b591..58148a09f33 100644 --- a/pip/wheel.py +++ b/pip/wheel.py @@ -487,21 +487,19 @@ def build(self): reqset = self.requirement_set.requirements.values() - #make the wheelhouse - if not os.path.exists(self.wheel_dir): - os.makedirs(self.wheel_dir) + buildset = [req for req in reqset if not req.is_wheel] + + if not buildset: + return #build the wheels logger.notify( 'Building wheels for collected packages: %s' % - ','.join([req.name for req in reqset]) + ','.join([req.name for req in buildset]) ) logger.indent += 2 build_success, build_failure = [], [] - for req in reqset: - if req.is_wheel: - logger.notify("Skipping building wheel: %s", req.url) - continue + for req in buildset: if self._build_one(req): build_success.append(req) else: diff --git a/tests/functional/test_wheel.py b/tests/functional/test_wheel.py index f7344e81258..98e61db3489 100644 --- a/tests/functional/test_wheel.py +++ b/tests/functional/test_wheel.py @@ -36,6 +36,20 @@ def test_pip_wheel_success(script, data): assert "Successfully built simple" in result.stdout, result.stdout +def test_pip_wheel_downloads_wheels(script, data): + """ + Test 'pip wheel' downloads wheels + """ + script.pip('install', 'wheel') + result = script.pip( + 'wheel', '--no-index', '-f', data.find_links, 'simple.dist', + ) + wheel_file_name = 'simple.dist-0.1-py2.py3-none-any.whl' + wheel_file_path = script.scratch/'wheelhouse'/wheel_file_name + assert wheel_file_path in result.files_created, result.stdout + assert "Saved" in result.stdout, result.stdout + + def test_pip_wheel_fail(script, data): """ Test 'pip wheel' failure. @@ -56,7 +70,7 @@ def test_pip_wheel_fail(script, data): def test_pip_wheel_ignore_wheels_editables(script, data): """ - Test 'pip wheel' ignores editables and *.whl files in requirements + Test 'pip wheel' ignores editables """ script.pip('install', 'wheel') @@ -79,7 +93,6 @@ def test_pip_wheel_ignore_wheels_editables(script, data): ) assert "Successfully built simple" in result.stdout, result.stdout assert "Failed to build" not in result.stdout, result.stdout - assert "ignoring %s" % local_wheel in result.stdout ignore_editable = "ignoring %s" % path_to_url(local_editable) #TODO: understand this divergence if sys.platform == 'win32': diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index 8dcfdfa5078..5ee5b2953bf 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -1,6 +1,6 @@ import hashlib import os -from shutil import rmtree +from shutil import rmtree, copy from tempfile import mkdtemp from mock import Mock, patch @@ -8,7 +8,8 @@ import pip from pip.backwardcompat import urllib, BytesIO, b, pathname2url -from pip.download import PipSession, path_to_url, unpack_http_url, url_to_path +from pip.download import (PipSession, path_to_url, unpack_http_url, + url_to_path, unpack_file_url) from pip.index import Link @@ -182,3 +183,89 @@ def test_path_to_url_win(): @pytest.mark.skipif("sys.platform != 'win32'") def test_url_to_path_win(): assert url_to_path('file:///c:/tmp/file') == 'c:/tmp/file' + + +class Test_unpack_file_url(object): + + def prep(self, tmpdir, data): + self.build_dir = tmpdir.join('build') + self.download_dir = tmpdir.join('download') + os.mkdir(self.build_dir) + os.mkdir(self.download_dir) + self.dist_file = "simple-1.0.tar.gz" + self.dist_file2 = "simple-2.0.tar.gz" + self.dist_path = data.packages.join(self.dist_file) + self.dist_path2 = data.packages.join(self.dist_file2) + self.dist_url = Link(path_to_url(self.dist_path)) + self.dist_url2 = Link(path_to_url(self.dist_path2)) + + def test_unpack_file_url_no_download(self, tmpdir, data): + self.prep(tmpdir, data) + unpack_file_url(self.dist_url, self.build_dir) + assert os.path.isdir(os.path.join(self.build_dir, 'simple')) + assert not os.path.isfile( + os.path.join(self.download_dir, self.dist_file)) + + def test_unpack_file_url_and_download(self, tmpdir, data): + self.prep(tmpdir, data) + unpack_file_url(self.dist_url, self.build_dir, + download_dir=self.download_dir) + assert os.path.isdir(os.path.join(self.build_dir, 'simple')) + assert os.path.isfile(os.path.join(self.download_dir, self.dist_file)) + + def test_unpack_file_url_download_already_exists(self, tmpdir, + data, monkeypatch): + self.prep(tmpdir, data) + # add in previous download (copy simple-2.0 as simple-1.0) + # so we can tell it didn't get overwritten + dest_file = os.path.join(self.download_dir, self.dist_file) + copy(self.dist_path2, dest_file) + dist_path2_md5 = hashlib.md5( + open(self.dist_path2, 'rb').read()).hexdigest() + + unpack_file_url(self.dist_url, self.build_dir, + download_dir=self.download_dir) + # our hash should be the same, i.e. not overwritten by simple-1.0 hash + assert dist_path2_md5 == hashlib.md5( + open(dest_file, 'rb').read()).hexdigest() + + def test_unpack_file_url_download_bad_hash(self, tmpdir, data, + monkeypatch): + """ + Test when existing download has different hash from the file url + fragment + """ + self.prep(tmpdir, data) + + # add in previous download (copy simple-2.0 as simple-1.0 so it's wrong + # hash) + dest_file = os.path.join(self.download_dir, self.dist_file) + copy(self.dist_path2, dest_file) + + dist_path_md5 = hashlib.md5( + open(self.dist_path, 'rb').read()).hexdigest() + dist_path2_md5 = hashlib.md5(open(dest_file, 'rb').read()).hexdigest() + + assert dist_path_md5 != dist_path2_md5 + + self.dist_url.url = "%s#md5=%s" % ( + self.dist_url.url, + dist_path_md5 + ) + unpack_file_url(self.dist_url, self.build_dir, + download_dir=self.download_dir) + + # confirm hash is for simple1-1.0 + # the previous bad download has been removed + assert (hashlib.md5(open(dest_file, 'rb').read()).hexdigest() + == + dist_path_md5 + ), hashlib.md5(open(dest_file, 'rb').read()).hexdigest() + + def test_unpack_file_url_thats_a_dir(self, tmpdir, data): + self.prep(tmpdir, data) + dist_path = data.packages.join("FSPkg") + dist_url = Link(path_to_url(dist_path)) + unpack_file_url(dist_url, self.build_dir, + download_dir=self.download_dir) + assert os.path.isdir(os.path.join(self.build_dir, 'fspkg')) From eb7a31e01961130a476abea4b564c51bd63c64ee Mon Sep 17 00:00:00 2001 From: Marcus Smith <qwcode@gmail.com> Date: Sat, 1 Feb 2014 14:04:58 -0800 Subject: [PATCH 2/2] when file urls have hash fragments, check it --- pip/download.py | 21 +++++++++++++-------- tests/unit/test_download.py | 11 +++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/pip/download.py b/pip/download.py index a9169842acd..7458e48e9d3 100644 --- a/pip/download.py +++ b/pip/download.py @@ -625,7 +625,6 @@ def unpack_http_url(link, location, download_cache, download_dir=None, def unpack_file_url(link, location, download_dir=None): link_path = url_to_path(link.url_without_fragment) - from_path = None already_downloaded = False # If it's a url to a local directory @@ -635,6 +634,11 @@ def unpack_file_url(link, location, download_dir=None): shutil.copytree(link_path, location, symlinks=True) return + # if link has a hash, let's confirm it matches + if link.hash: + link_path_hash = _get_hash_from_file(link_path, link) + _check_hash(link_path_hash, link) + # If a download dir is specified, is the file already there and valid? if download_dir: download_path = os.path.join(download_dir, link.filename) @@ -655,16 +659,17 @@ def unpack_file_url(link, location, download_dir=None): else: already_downloaded = True - # a download dir is specified and not already downloaded - if download_dir and not already_downloaded: - content_type = mimetypes.guess_type(link_path)[0] - _copy_file(link_path, download_dir, content_type, link) - - # unpack the archive to the build dir location. even when only downloading - # archives, they have to be unpacked to parse dependencies if already_downloaded: from_path = download_path else: from_path = link_path + content_type = mimetypes.guess_type(from_path)[0] + + # unpack the archive to the build dir location. even when only downloading + # archives, they have to be unpacked to parse dependencies unpack_file(from_path, location, content_type, link) + + # a download dir is specified and not already downloaded + if download_dir and not already_downloaded: + _copy_file(from_path, download_dir, content_type, link) diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index 5ee5b2953bf..e79eaecb21f 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -8,6 +8,7 @@ import pip from pip.backwardcompat import urllib, BytesIO, b, pathname2url +from pip.exceptions import HashMismatch from pip.download import (PipSession, path_to_url, unpack_http_url, url_to_path, unpack_file_url) from pip.index import Link @@ -229,6 +230,16 @@ def test_unpack_file_url_download_already_exists(self, tmpdir, assert dist_path2_md5 == hashlib.md5( open(dest_file, 'rb').read()).hexdigest() + def test_unpack_file_url_bad_hash(self, tmpdir, data, + monkeypatch): + """ + Test when the file url hash fragment is wrong + """ + self.prep(tmpdir, data) + self.dist_url.url = "%s#md5=bogus" % self.dist_url.url + with pytest.raises(HashMismatch): + unpack_file_url(self.dist_url, self.build_dir) + def test_unpack_file_url_download_bad_hash(self, tmpdir, data, monkeypatch): """