From 1e9b865e1cfdb5908b37d11b4d8c1d8a505623f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Gia=20Phong?= Date: Thu, 20 Aug 2020 11:53:19 +0700 Subject: [PATCH] Finalize parallel download UI Unfortunately HEAD responses does not carry caching information, so all files will be assumed to be downloaded from the index. Hence, the speed report might be inaccurate if there are cached files. --- news/8771.feature | 1 + src/pip/_internal/network/download.py | 33 ++++++++------------------- 2 files changed, 11 insertions(+), 23 deletions(-) create mode 100644 news/8771.feature diff --git a/news/8771.feature b/news/8771.feature new file mode 100644 index 00000000000..c28d8075ec1 --- /dev/null +++ b/news/8771.feature @@ -0,0 +1 @@ +Download distributions in parallel when fast-deps feature is enabled. diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index 59f9e584ba8..14ca4e1faa5 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -204,15 +204,6 @@ def chunks(self): ) -def _write_from_cache(file): - # type: (_FileToDownload) -> Tuple[str, Tuple[str, str]] - assert file.is_cached - with open(file.path, 'wb') as content_file: - for chunk in file.chunks: - content_file.write(chunk) - return file.url, (file.path, file.type) - - class BatchDownloader(object): def __init__( @@ -230,8 +221,9 @@ def __init__( self._lock = Lock() def _files_to_download(self, links, location): - # type: (Iterable[Link], str) -> Iterable[_FileToDownload] - return map(partial(_FileToDownload, location, self._session), links) + # type: (Iterable[Link], str) -> List[_FileToDownload] + file_to_download = partial(_FileToDownload, location, self._session) + return list(map(file_to_download, links)) @property def _is_downloading(self): @@ -261,13 +253,16 @@ def _progress(self, files): Its __enter__() returns a routine to update such progress, which is to be used as the first argument of self._download_one. """ - total_size = sum(file.size for file in files if file.size is not None) - if _should_hide_progress(total_size): + # Is invalid/lacking Content-Length common enough + # to be handled specially? + size = sum(file.size for file in files if file.size is not None) + logger.info('Downloading %d files (%s)', len(files), format_size(size)) + if _should_hide_progress(size): yield lambda chunk: None else: assert not self._is_downloading self._count = Semaphore(len(files)) - progress = DownloadProgressProvider(self._progress_bar, total_size) + progress = DownloadProgressProvider(self._progress_bar, size) self._chunks = progress(self._iter_chunks()) yield self._update_progress @@ -293,15 +288,7 @@ def _download_one( def __call__(self, links, location): # type: (Iterable[Link], str) -> Iterable[Tuple[str, Tuple[str, str]]] """Download the files given by links into location.""" - files = [] # type: List[_FileToDownload] - cached_files = [] # type: List[_FileToDownload] - for file in self._files_to_download(links, location): - (cached_files if file.is_cached else files).append(file) - - # Cached files need no progress bar. - for result in map_multithread(_write_from_cache, cached_files): - yield result - + files = self._files_to_download(links, location) with self._progress(files) as update: download_one = partial(self._download_one, update) for result in map_multithread(download_one, files):