Skip to content

Commit 5a88d50

Browse files
timgrahamorsenthil
authored andcommitted
bpo-27657: Fix urlparse() with numeric paths (#661)
* bpo-27657: Fix urlparse() with numeric paths Revert parsing decision from bpo-754016 in favor of the documented consensus in bpo-16932 of how to treat strings without a // to designate the netloc. * bpo-22891: Remove urlsplit() optimization for 'http' prefixed inputs.
1 parent fbe3c76 commit 5a88d50

File tree

3 files changed

+9
-25
lines changed

3 files changed

+9
-25
lines changed

Diff for: Lib/test/test_urlparse.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -709,15 +709,17 @@ def test_withoutscheme(self):
709709

710710
def test_portseparator(self):
711711
# Issue 754016 makes changes for port separator ':' from scheme separator
712-
self.assertEqual(urllib.parse.urlparse("path:80"),
713-
('','','path:80','','',''))
712+
self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
713+
self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
714+
self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
714715
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
715716
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
716717
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
717718
('http','www.python.org:80','','','',''))
718719
# As usual, need to check bytes input as well
719-
self.assertEqual(urllib.parse.urlparse(b"path:80"),
720-
(b'',b'',b'path:80',b'',b'',b''))
720+
self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
721+
self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
722+
self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
721723
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
722724
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
723725
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),

Diff for: Lib/urllib/parse.py

+1-21
Original file line numberDiff line numberDiff line change
@@ -431,31 +431,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
431431
netloc = query = fragment = ''
432432
i = url.find(':')
433433
if i > 0:
434-
if url[:i] == 'http': # optimize the common case
435-
url = url[i+1:]
436-
if url[:2] == '//':
437-
netloc, url = _splitnetloc(url, 2)
438-
if (('[' in netloc and ']' not in netloc) or
439-
(']' in netloc and '[' not in netloc)):
440-
raise ValueError("Invalid IPv6 URL")
441-
if allow_fragments and '#' in url:
442-
url, fragment = url.split('#', 1)
443-
if '?' in url:
444-
url, query = url.split('?', 1)
445-
_checknetloc(netloc)
446-
v = SplitResult('http', netloc, url, query, fragment)
447-
_parse_cache[key] = v
448-
return _coerce_result(v)
449434
for c in url[:i]:
450435
if c not in scheme_chars:
451436
break
452437
else:
453-
# make sure "url" is not actually a port number (in which case
454-
# "scheme" is really part of the path)
455-
rest = url[i+1:]
456-
if not rest or any(c not in '0123456789' for c in rest):
457-
# not a port number
458-
scheme, url = url[:i].lower(), rest
438+
scheme, url = url[:i].lower(), url[i+1:]
459439

460440
if url[:2] == '//':
461441
netloc, url = _splitnetloc(url, 2)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix urllib.parse.urlparse() with numeric paths. A string like "path:80" is
2+
no longer parsed as a path but as a scheme ("path") and a path ("80").

0 commit comments

Comments
 (0)