From 957c093a106995ec5b903b19bc314fd376b783f7 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 14:07:15 +0300 Subject: [PATCH 01/17] Speed up pip list --outdated --- src/pip/_internal/commands/list.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index cce470a6051..fc09bde9ee2 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -5,9 +5,11 @@ import json import logging +import concurrent.futures from pip._vendor import six from pip._vendor.six.moves import zip_longest +from pip._vendor.requests.adapters import DEFAULT_POOLSIZE from pip._internal.cli import cmdoptions from pip._internal.cli.req_command import IndexGroupCommand @@ -182,10 +184,10 @@ def get_not_required(self, packages, options): def iter_packages_latest_infos(self, packages, options): with self._build_session(options) as session: finder = self._build_package_finder(options, session) - - for dist in packages: + with concurrent.futures.ThreadPoolExecutor(max_workers=DEFAULT_POOLSIZE) as executor: + all_candidates_list = executor.map(finder.find_all_candidates, [dist.key for dist in packages]) + for dist, all_candidates in zip(packages, all_candidates_list): typ = 'unknown' - all_candidates = finder.find_all_candidates(dist.key) if not options.pre: # Remove prereleases all_candidates = [candidate for candidate in all_candidates From 3151566154a493fcd417dc7b0639b7efdb38bb83 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 14:14:09 +0300 Subject: [PATCH 02/17] Add news --- news/7962.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/7962.bugfix diff --git a/news/7962.bugfix b/news/7962.bugfix new file mode 100644 index 00000000000..b1437031df7 --- /dev/null +++ b/news/7962.bugfix @@ -0,0 +1 @@ +`pip list --outdated` version fetching is multi-threaded From f8e2b5d5a0b234ea664a66b06c52ad303dc03dab Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 14:19:34 +0300 Subject: [PATCH 03/17] Make indent_log thread-safe --- src/pip/_internal/utils/logging.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pip/_internal/utils/logging.py b/src/pip/_internal/utils/logging.py index 134f7908d9d..0143434ee2f 100644 --- a/src/pip/_internal/utils/logging.py +++ b/src/pip/_internal/utils/logging.py @@ -104,6 +104,8 @@ def indent_log(num=2): A context manager which will cause the log output to be indented for any log messages emitted inside it. """ + # For thread-safety + _log_state.indentation = get_indentation() _log_state.indentation += num try: yield From 829a3b20c4620cc4a06bd937315eba96f1c86628 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 14:25:02 +0300 Subject: [PATCH 04/17] Fix Python 2 compatibility --- src/pip/_internal/commands/list.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index fc09bde9ee2..7a9f3ff9347 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -5,11 +5,11 @@ import json import logging -import concurrent.futures +from multiprocessing.dummy import Pool from pip._vendor import six -from pip._vendor.six.moves import zip_longest from pip._vendor.requests.adapters import DEFAULT_POOLSIZE +from pip._vendor.six.moves import zip_longest from pip._internal.cli import cmdoptions from pip._internal.cli.req_command import IndexGroupCommand @@ -184,8 +184,15 @@ def get_not_required(self, packages, options): def iter_packages_latest_infos(self, packages, options): with self._build_session(options) as session: finder = self._build_package_finder(options, session) - with concurrent.futures.ThreadPoolExecutor(max_workers=DEFAULT_POOLSIZE) as executor: - all_candidates_list = executor.map(finder.find_all_candidates, [dist.key for dist in packages]) + + # Doing multithreading in Python 2 compatible way + executor = Pool(DEFAULT_POOLSIZE) + all_candidates_list = executor.map( + finder.find_all_candidates, + [dist.key for dist in packages] + ) + executor.terminate() + for dist, all_candidates in zip(packages, all_candidates_list): typ = 'unknown' if not options.pre: From 1d4fc03fe299102f7cf4db8e136093b11a63ab64 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 16:20:09 +0300 Subject: [PATCH 05/17] Change Pool to ThreadPool and fix minor closing --- src/pip/_internal/commands/list.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index 7a9f3ff9347..0f9d84cc2a7 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -5,7 +5,7 @@ import json import logging -from multiprocessing.dummy import Pool +from multiprocessing.pool import ThreadPool from pip._vendor import six from pip._vendor.requests.adapters import DEFAULT_POOLSIZE @@ -186,12 +186,13 @@ def iter_packages_latest_infos(self, packages, options): finder = self._build_package_finder(options, session) # Doing multithreading in Python 2 compatible way - executor = Pool(DEFAULT_POOLSIZE) + executor = ThreadPool(DEFAULT_POOLSIZE) all_candidates_list = executor.map( finder.find_all_candidates, [dist.key for dist in packages] ) - executor.terminate() + executor.close() + executor.join() for dist, all_candidates in zip(packages, all_candidates_list): typ = 'unknown' From ccb0b5cc0814e21653cde30675f6aece992f1e72 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 16:38:54 +0300 Subject: [PATCH 06/17] Lazier calculations --- src/pip/_internal/commands/list.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index 0f9d84cc2a7..1fd45ee77a4 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -185,17 +185,9 @@ def iter_packages_latest_infos(self, packages, options): with self._build_session(options) as session: finder = self._build_package_finder(options, session) - # Doing multithreading in Python 2 compatible way - executor = ThreadPool(DEFAULT_POOLSIZE) - all_candidates_list = executor.map( - finder.find_all_candidates, - [dist.key for dist in packages] - ) - executor.close() - executor.join() - - for dist, all_candidates in zip(packages, all_candidates_list): + def latest_infos(dist): typ = 'unknown' + all_candidates = finder.find_all_candidates(dist.key) if not options.pre: # Remove prereleases all_candidates = [candidate for candidate in all_candidates @@ -206,7 +198,7 @@ def iter_packages_latest_infos(self, packages, options): ) best_candidate = evaluator.sort_best_candidate(all_candidates) if best_candidate is None: - continue + return None remote_version = best_candidate.version if best_candidate.link.is_wheel: @@ -216,7 +208,16 @@ def iter_packages_latest_infos(self, packages, options): # This is dirty but makes the rest of the code much cleaner dist.latest_version = remote_version dist.latest_filetype = typ - yield dist + return dist + + pool = ThreadPool(DEFAULT_POOLSIZE) + + for dist in pool.imap_unordered(latest_infos, packages): + if dist is not None: + yield dist + + pool.close() + pool.join() def output_package_listing(self, packages, options): packages = sorted( From 03bfdbcc58582b0dbf23606179ff54765add8885 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 17:15:47 +0300 Subject: [PATCH 07/17] Refactoring. Rename inner function --- src/pip/_internal/commands/list.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index 1fd45ee77a4..974b6810116 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -185,7 +185,7 @@ def iter_packages_latest_infos(self, packages, options): with self._build_session(options) as session: finder = self._build_package_finder(options, session) - def latest_infos(dist): + def latest_info(dist): typ = 'unknown' all_candidates = finder.find_all_candidates(dist.key) if not options.pre: @@ -212,7 +212,7 @@ def latest_infos(dist): pool = ThreadPool(DEFAULT_POOLSIZE) - for dist in pool.imap_unordered(latest_infos, packages): + for dist in pool.imap_unordered(latest_info, packages): if dist is not None: yield dist From f695354bf8103ea18774e45626c71115213078bc Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 21:55:34 +0300 Subject: [PATCH 08/17] Add commentary --- src/pip/_internal/commands/list.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index 974b6810116..db5e7979391 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -210,6 +210,10 @@ def latest_info(dist): dist.latest_filetype = typ return dist + # This is done to multithread requests to pypi.org and eventually + # get performance boost so that "real time" of this function is + # almost equal to "user time". Also this gives performance + # boost up to 2x pool = ThreadPool(DEFAULT_POOLSIZE) for dist in pool.imap_unordered(latest_info, packages): From 6b8c191ee2953b23178854457cdc84c0e2ed8ed4 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 21:57:10 +0300 Subject: [PATCH 09/17] Update comment --- src/pip/_internal/commands/list.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index db5e7979391..9526b49e2a7 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -211,9 +211,8 @@ def latest_info(dist): return dist # This is done to multithread requests to pypi.org and eventually - # get performance boost so that "real time" of this function is - # almost equal to "user time". Also this gives performance - # boost up to 2x + # get performance boost up to 2x so that "real time" of this + # function is almost equal to "user time" pool = ThreadPool(DEFAULT_POOLSIZE) for dist in pool.imap_unordered(latest_info, packages): From 9b9e137d1dcceeb6b56fc8623d11bf159c38fe47 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 2 Apr 2020 22:01:22 +0300 Subject: [PATCH 10/17] Update comment --- src/pip/_internal/commands/list.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index 9526b49e2a7..513b516615b 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -210,9 +210,9 @@ def latest_info(dist): dist.latest_filetype = typ return dist - # This is done to multithread requests to pypi.org and eventually - # get performance boost up to 2x so that "real time" of this - # function is almost equal to "user time" + # This is done for 2x speed up of requests to pypi.org + # so that "real time" of this function + # is almost equal to "user time" pool = ThreadPool(DEFAULT_POOLSIZE) for dist in pool.imap_unordered(latest_info, packages): From 5cccd4ef6b7150f653edc9f4409c90cecd2d13ed Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Fri, 3 Apr 2020 09:07:09 +0300 Subject: [PATCH 11/17] Change ThreadPool to Pool --- src/pip/_internal/commands/list.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index 513b516615b..2d909ff11e0 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -5,7 +5,7 @@ import json import logging -from multiprocessing.pool import ThreadPool +from multiprocessing.dummy import Pool from pip._vendor import six from pip._vendor.requests.adapters import DEFAULT_POOLSIZE @@ -213,7 +213,7 @@ def latest_info(dist): # This is done for 2x speed up of requests to pypi.org # so that "real time" of this function # is almost equal to "user time" - pool = ThreadPool(DEFAULT_POOLSIZE) + pool = Pool(DEFAULT_POOLSIZE) for dist in pool.imap_unordered(latest_info, packages): if dist is not None: From 3bc3cee5aeb7d5bbf9db13091884ca92cc259abd Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Sat, 4 Apr 2020 22:18:12 +0300 Subject: [PATCH 12/17] Add logging thread-safety tests --- tests/unit/test_logging.py | 43 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/unit/test_logging.py b/tests/unit/test_logging.py index a2bab3ea9c5..7a5309bb99b 100644 --- a/tests/unit/test_logging.py +++ b/tests/unit/test_logging.py @@ -2,6 +2,7 @@ import logging import os import time +from threading import Thread import pytest from mock import patch @@ -11,6 +12,7 @@ BrokenStdoutLoggingError, ColorizedStreamHandler, IndentingFormatter, + indent_log ) from pip._internal.utils.misc import captured_stderr, captured_stdout @@ -108,6 +110,47 @@ def test_format_deprecated(self, level_name, expected): f = IndentingFormatter(fmt="%(message)s") assert f.format(record) == expected + def test_thread_safety_base(self): + actual = "Initial content" + + record = self.make_record( + 'DEPRECATION: hello\nworld', level_name='WARNING', + ) + f = IndentingFormatter(fmt="%(message)s") + + def thread_function(): + nonlocal actual, f + actual = f.format(record) + + thread_function() + expected = actual + actual = "Another initial content" + thread = Thread(target=thread_function) + thread.start() + thread.join() + assert actual == expected + + def test_thread_safety_indent_log(self): + actual = "Initial content" + + record = self.make_record( + 'DEPRECATION: hello\nworld', level_name='WARNING', + ) + f = IndentingFormatter(fmt="%(message)s") + + def thread_function(): + nonlocal actual, f + with indent_log(): + actual = f.format(record) + + thread_function() + expected = actual + actual = "Another initial content" + thread = Thread(target=thread_function) + thread.start() + thread.join() + assert actual == expected + class TestColorizedStreamHandler(object): From f8bb362fef1b5e77d4f1f40cca003a219820120d Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Sat, 4 Apr 2020 22:23:00 +0300 Subject: [PATCH 13/17] Fix Python 2 compatibility in tests --- tests/unit/test_logging.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_logging.py b/tests/unit/test_logging.py index 7a5309bb99b..7592fcf31ef 100644 --- a/tests/unit/test_logging.py +++ b/tests/unit/test_logging.py @@ -111,45 +111,37 @@ def test_format_deprecated(self, level_name, expected): assert f.format(record) == expected def test_thread_safety_base(self): - actual = "Initial content" - record = self.make_record( 'DEPRECATION: hello\nworld', level_name='WARNING', ) f = IndentingFormatter(fmt="%(message)s") + results = [] def thread_function(): - nonlocal actual, f - actual = f.format(record) + results.append(f.format(record)) thread_function() - expected = actual - actual = "Another initial content" thread = Thread(target=thread_function) thread.start() thread.join() - assert actual == expected + assert results[0] == results[1] def test_thread_safety_indent_log(self): - actual = "Initial content" - record = self.make_record( 'DEPRECATION: hello\nworld', level_name='WARNING', ) f = IndentingFormatter(fmt="%(message)s") + results = [] def thread_function(): - nonlocal actual, f with indent_log(): - actual = f.format(record) + results.append(f.format(record)) thread_function() - expected = actual - actual = "Another initial content" thread = Thread(target=thread_function) thread.start() thread.join() - assert actual == expected + assert results[0] == results[1] class TestColorizedStreamHandler(object): From e454181ba65f706f90c79f774d5f7680a0c9d559 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Sat, 4 Apr 2020 22:30:57 +0300 Subject: [PATCH 14/17] Fix coding style --- tests/unit/test_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_logging.py b/tests/unit/test_logging.py index 7592fcf31ef..a62c18c770f 100644 --- a/tests/unit/test_logging.py +++ b/tests/unit/test_logging.py @@ -12,7 +12,7 @@ BrokenStdoutLoggingError, ColorizedStreamHandler, IndentingFormatter, - indent_log + indent_log, ) from pip._internal.utils.misc import captured_stderr, captured_stdout From 917ad6aa0e78b79073948ba1ff583f90d179af76 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 9 Apr 2020 17:19:54 +0300 Subject: [PATCH 15/17] Update news/7962.bugfix Co-Authored-By: Pradyun Gedam --- news/7962.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/7962.bugfix b/news/7962.bugfix index b1437031df7..4b8b0068096 100644 --- a/news/7962.bugfix +++ b/news/7962.bugfix @@ -1 +1 @@ -`pip list --outdated` version fetching is multi-threaded +Significantly speedup `pip list --outdated` through parallelizing index interaction. From b744854f442b0f56c802c793cf013d9773e1d620 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Thu, 9 Apr 2020 17:57:10 +0300 Subject: [PATCH 16/17] Update news/7962.bugfix. Fix linter Co-Authored-By: Pradyun Gedam --- news/7962.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/7962.bugfix b/news/7962.bugfix index 4b8b0068096..76c3442d053 100644 --- a/news/7962.bugfix +++ b/news/7962.bugfix @@ -1 +1 @@ -Significantly speedup `pip list --outdated` through parallelizing index interaction. +Significantly speedup ``pip list --outdated`` through parallelizing index interaction. From f0ae64f959f871c79cc7d6adf3cfb010d5649f96 Mon Sep 17 00:00:00 2001 From: Nikolay Korolev Date: Sun, 12 Apr 2020 19:01:51 +0300 Subject: [PATCH 17/17] Delete misleading line of code --- src/pip/_internal/utils/logging.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pip/_internal/utils/logging.py b/src/pip/_internal/utils/logging.py index 0143434ee2f..9a017cf7e33 100644 --- a/src/pip/_internal/utils/logging.py +++ b/src/pip/_internal/utils/logging.py @@ -52,7 +52,6 @@ _log_state = threading.local() -_log_state.indentation = 0 subprocess_logger = getLogger('pip.subprocessor')