diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 80fb9e5cd2a445..f1f46be0cab6d2 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -686,6 +686,13 @@ def test_attributes_bad_scheme(self): else: self.assertEqual(p.scheme, "") + def test_attributes_bad_scheme_CVE_2023_24329(self): + """Check handling of invalid schemes that starts with blank characters.""" + for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): + url = " https://www.example.net" + p = parse(url) + self.assertEqual(p.scheme, "https") + def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 5f95c5ff7f9c1c..3dc85ff6ae9a07 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -451,6 +451,7 @@ def urlsplit(url, scheme='', allow_fragments=True): Note that % escapes are not expanded. """ + url = url.lstrip() url, scheme, _coerce_result = _coerce_args(url, scheme) for b in _UNSAFE_URL_BYTES_TO_REMOVE: diff --git a/Misc/NEWS.d/next/Security/2023-03-06-22-48-08.gh-issue-102153.eiaVrE.rst b/Misc/NEWS.d/next/Security/2023-03-06-22-48-08.gh-issue-102153.eiaVrE.rst new file mode 100644 index 00000000000000..9904dfe666956d --- /dev/null +++ b/Misc/NEWS.d/next/Security/2023-03-06-22-48-08.gh-issue-102153.eiaVrE.rst @@ -0,0 +1,3 @@ +Fix the parsing problem(CVE-2023-24329) in urlparse when the entire URL +starts with blank characters. This vulnerability would help an attacker to bypass +the protections set by the developer for scheme and host.