From f0812d784836d18fd25ea32f9b5a0c9c6e92425b Mon Sep 17 00:00:00 2001 From: Hubert Hirtz Date: Mon, 4 Mar 2024 01:27:55 +0000 Subject: [PATCH] [utils] Handle user:pass in URLs (#28801) * Handle user:pass in URLs Fixes "nonnumeric port" errors when youtube-dl is given URLs with usernames and passwords such as: http://username:password@example.com/myvideo.mp4 Refs: - https://en.wikipedia.org/wiki/Basic_access_authentication - https://tools.ietf.org/html/rfc1738#section-3.1 - https://docs.python.org/3.8/library/urllib.parse.html#urllib.parse.urlsplit Fixes #18276 (point 4) Fixes #20258 Fixes #26211 (see comment) * Align code with yt-dlp --------- Co-authored-by: dirkf --- test/test_utils.py | 13 +++++++++++++ youtube_dl/utils.py | 22 +++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 102420fcb88..90d64b5811e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -81,6 +81,7 @@ sanitize_filename, sanitize_path, sanitize_url, + sanitized_Request, shell_quote, smuggle_url, str_or_none, @@ -255,6 +256,18 @@ def test_sanitize_url(self): self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('foo bar'), 'foo bar') + def test_sanitized_Request(self): + self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization')) + self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization')) + self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'), + 'Basic Og==') + self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'), + 'Basic OnBhc3M=') + self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'), + 'Basic dXNlcjo=') + self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'), + 'Basic dXNlcjpwYXNz') + def test_expand_path(self): def env(var): return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 61b94d84c44..c249e71681d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2182,8 +2182,28 @@ def sanitize_url(url): return url +def extract_basic_auth(url): + parts = compat_urllib_parse.urlsplit(url) + if parts.username is None: + return url, None + url = compat_urllib_parse.urlunsplit(parts._replace(netloc=( + parts.hostname if parts.port is None + else '%s:%d' % (parts.hostname, parts.port)))) + auth_payload = base64.b64encode( + ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8')) + return url, 'Basic {0}'.format(auth_payload.decode('ascii')) + + def sanitized_Request(url, *args, **kwargs): - return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs) + url, auth_header = extract_basic_auth(escape_url(sanitize_url(url))) + if auth_header is not None: + headers = args[1] if len(args) > 1 else kwargs.get('headers') + headers = headers or {} + headers['Authorization'] = auth_header + if len(args) <= 1 and kwargs.get('headers') is None: + kwargs['headers'] = headers + kwargs = compat_kwargs(kwargs) + return compat_urllib_request.Request(url, *args, **kwargs) def expand_path(s):