From 78371cc95079e3bba43e88bcb67e0b9f8f883a50 Mon Sep 17 00:00:00 2001 From: Tom Forbes Date: Fri, 19 Oct 2018 10:06:10 +0100 Subject: [PATCH] Redact basic authentication passwords from log messages (#5773) Redact basic authentication passwords from URLs. --- news/4746.bugfix | 1 + src/pip/_internal/index.py | 6 ++--- src/pip/_internal/models/link.py | 7 ++--- src/pip/_internal/req/req_install.py | 6 ++--- src/pip/_internal/utils/misc.py | 39 +++++++++++++++++++++++----- src/pip/_internal/vcs/git.py | 7 +++-- tests/unit/test_index.py | 4 ++- tests/unit/test_utils.py | 35 +++++++++++++++++++++++-- 8 files changed, 84 insertions(+), 21 deletions(-) create mode 100644 news/4746.bugfix diff --git a/news/4746.bugfix b/news/4746.bugfix new file mode 100644 index 00000000000..3ad37679f89 --- /dev/null +++ b/news/4746.bugfix @@ -0,0 +1 @@ +Redact the password from the URL in various log messages. \ No newline at end of file diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 012e87a8205..876fff2213f 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -35,7 +35,7 @@ from pip._internal.utils.logging import indent_log from pip._internal.utils.misc import ( ARCHIVE_EXTENSIONS, SUPPORTED_EXTENSIONS, normalize_path, - remove_auth_from_url, + redact_password_from_url, ) from pip._internal.utils.packaging import check_requires_python from pip._internal.wheel import Wheel, wheel_ext @@ -326,7 +326,7 @@ def get_formatted_locations(self): if self.index_urls and self.index_urls != [PyPI.simple_url]: lines.append( "Looking in indexes: {}".format(", ".join( - remove_auth_from_url(url) for url in self.index_urls)) + redact_password_from_url(url) for url in self.index_urls)) ) if self.find_links: lines.append( @@ -923,7 +923,7 @@ def __init__(self, content, url, headers=None): self.headers = headers def __str__(self): - return self.url + return redact_password_from_url(self.url) def iter_links(self): """Yields all links in the page""" diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 5decb7cc476..a7b803f6ee4 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -4,7 +4,7 @@ from pip._vendor.six.moves.urllib import parse as urllib_parse from pip._internal.download import path_to_url -from pip._internal.utils.misc import splitext +from pip._internal.utils.misc import redact_password_from_url, splitext from pip._internal.utils.models import KeyBasedCompareMixin from pip._internal.wheel import wheel_ext @@ -44,9 +44,10 @@ def __str__(self): else: rp = '' if self.comes_from: - return '%s (from %s)%s' % (self.url, self.comes_from, rp) + return '%s (from %s)%s' % (redact_password_from_url(self.url), + self.comes_from, rp) else: - return str(self.url) + return redact_password_from_url(str(self.url)) def __repr__(self): return '' % self diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index c2624feed7c..aa4bb559bd9 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -30,7 +30,7 @@ from pip._internal.utils.misc import ( _make_build_dir, ask_path_exists, backup_dir, call_subprocess, display_path, dist_in_site_packages, dist_in_usersite, ensure_dir, - get_installed_version, rmtree, + get_installed_version, redact_password_from_url, rmtree, ) from pip._internal.utils.packaging import get_metadata from pip._internal.utils.setuptools_build import SETUPTOOLS_SHIM @@ -128,9 +128,9 @@ def __str__(self): if self.req: s = str(self.req) if self.link: - s += ' from %s' % self.link.url + s += ' from %s' % redact_password_from_url(self.link.url) elif self.link: - s = self.link.url + s = redact_password_from_url(self.link.url) else: s = '' if self.satisfied_by is not None: diff --git a/src/pip/_internal/utils/misc.py b/src/pip/_internal/utils/misc.py index 6998e16f06e..b4814abd0c2 100644 --- a/src/pip/_internal/utils/misc.py +++ b/src/pip/_internal/utils/misc.py @@ -890,15 +890,24 @@ def split_auth_from_netloc(netloc): return netloc, user_pass -def remove_auth_from_url(url): - # Return a copy of url with 'username:password@' removed. - # username/pass params are passed to subversion through flags - # and are not recognized in the url. +def redact_netloc(netloc): + """ + Replace the password in a netloc with "****", if it exists. + + For example, "user:pass@example.com" returns "user:****@example.com". + """ + netloc, (user, password) = split_auth_from_netloc(netloc) + if user is None: + return netloc + password = '' if password is None else ':****' + return '{user}{password}@{netloc}'.format(user=user, + password=password, + netloc=netloc) - # parsed url - purl = urllib_parse.urlsplit(url) - netloc, user_pass = split_auth_from_netloc(purl.netloc) +def _transform_url(url, transform_netloc): + purl = urllib_parse.urlsplit(url) + netloc = transform_netloc(purl.netloc) # stripped url url_pieces = ( purl.scheme, netloc, purl.path, purl.query, purl.fragment @@ -907,6 +916,22 @@ def remove_auth_from_url(url): return surl +def _get_netloc(netloc): + return split_auth_from_netloc(netloc)[0] + + +def remove_auth_from_url(url): + # Return a copy of url with 'username:password@' removed. + # username/pass params are passed to subversion through flags + # and are not recognized in the url. + return _transform_url(url, _get_netloc) + + +def redact_password_from_url(url): + """Replace the password in a given url with ****.""" + return _transform_url(url, redact_netloc) + + def protect_pip_from_modification_on_windows(modifying_pip): """Protection of pip.exe from modification on Windows diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 97785394631..89905eb3128 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -10,7 +10,9 @@ from pip._internal.exceptions import BadCommand from pip._internal.utils.compat import samefile -from pip._internal.utils.misc import display_path, make_vcs_requirement_url +from pip._internal.utils.misc import ( + display_path, make_vcs_requirement_url, redact_password_from_url, +) from pip._internal.utils.temp_dir import TempDirectory from pip._internal.vcs import VersionControl, vcs @@ -193,7 +195,8 @@ def is_commit_id_equal(self, dest, name): def fetch_new(self, dest, url, rev_options): rev_display = rev_options.to_display() logger.info( - 'Cloning %s%s to %s', url, rev_display, display_path(dest), + 'Cloning %s%s to %s', redact_password_from_url(url), + rev_display, display_path(dest), ) self.run_command(['clone', '-q', url, dest]) diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index ce81a9deb02..eaac77e070f 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -161,7 +161,9 @@ def test_get_formatted_locations_basic_auth(): finder = PackageFinder([], index_urls, session=[]) result = finder.get_formatted_locations() - assert 'user' not in result and 'pass' not in result + assert 'user' in result + assert '****' in result + assert 'pass' not in result @pytest.mark.parametrize( diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 591c8ab54c0..7f98fd6309a 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -24,8 +24,9 @@ from pip._internal.utils.hashes import Hashes, MissingHashes from pip._internal.utils.misc import ( call_subprocess, egg_link_path, ensure_dir, get_installed_distributions, - get_prog, make_vcs_requirement_url, normalize_path, remove_auth_from_url, - rmtree, split_auth_from_netloc, untar_file, unzip_file, + get_prog, make_vcs_requirement_url, normalize_path, redact_netloc, + redact_password_from_url, remove_auth_from_url, rmtree, + split_auth_from_netloc, untar_file, unzip_file, ) from pip._internal.utils.packaging import check_dist_requires_python from pip._internal.utils.temp_dir import TempDirectory @@ -662,6 +663,25 @@ def test_split_auth_from_netloc(netloc, expected): assert actual == expected +@pytest.mark.parametrize('netloc, expected', [ + # Test a basic case. + ('example.com', 'example.com'), + # Test with username and no password. + ('user@example.com', 'user@example.com'), + # Test with username and password. + ('user:pass@example.com', 'user:****@example.com'), + # Test with username and empty password. + ('user:@example.com', 'user:****@example.com'), + # Test the password containing an @ symbol. + ('user:pass@word@example.com', 'user:****@example.com'), + # Test the password containing a : symbol. + ('user:pass:word@example.com', 'user:****@example.com'), +]) +def test_redact_netloc(netloc, expected): + actual = redact_netloc(netloc) + assert actual == expected + + @pytest.mark.parametrize('auth_url, expected_url', [ ('https://user:pass@domain.tld/project/tags/v0.2', 'https://domain.tld/project/tags/v0.2'), @@ -681,3 +701,14 @@ def test_split_auth_from_netloc(netloc, expected): def test_remove_auth_from_url(auth_url, expected_url): url = remove_auth_from_url(auth_url) assert url == expected_url + + +@pytest.mark.parametrize('auth_url, expected_url', [ + ('https://user@example.com/abc', 'https://user@example.com/abc'), + ('https://user:password@example.com', 'https://user:****@example.com'), + ('https://user:@example.com', 'https://user:****@example.com'), + ('https://example.com', 'https://example.com') +]) +def test_redact_password_from_url(auth_url, expected_url): + url = redact_password_from_url(auth_url) + assert url == expected_url