Skip to content

Commit 387ff96

Browse files
[3.12] gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563) (GH-119024)
(cherry picked from commit e237b25) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 5bf7f5c commit 387ff96

File tree

3 files changed

+70
-4
lines changed

3 files changed

+70
-4
lines changed

Lib/test/test_urlparse.py

+67-3
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,17 @@
103103

104104
class UrlParseTestCase(unittest.TestCase):
105105

106-
def checkRoundtrips(self, url, parsed, split):
106+
def checkRoundtrips(self, url, parsed, split, url2=None):
107+
if url2 is None:
108+
url2 = url
107109
result = urllib.parse.urlparse(url)
108110
self.assertSequenceEqual(result, parsed)
109111
t = (result.scheme, result.netloc, result.path,
110112
result.params, result.query, result.fragment)
111113
self.assertSequenceEqual(t, parsed)
112114
# put it back together and it should be the same
113115
result2 = urllib.parse.urlunparse(result)
114-
self.assertSequenceEqual(result2, url)
116+
self.assertSequenceEqual(result2, url2)
115117
self.assertSequenceEqual(result2, result.geturl())
116118

117119
# the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
137139
result.query, result.fragment)
138140
self.assertSequenceEqual(t, split)
139141
result2 = urllib.parse.urlunsplit(result)
140-
self.assertSequenceEqual(result2, url)
142+
self.assertSequenceEqual(result2, url2)
141143
self.assertSequenceEqual(result2, result.geturl())
142144

143145
# check the fixpoint property of re-parsing the result of geturl()
@@ -175,9 +177,39 @@ def test_qs(self):
175177

176178
def test_roundtrips(self):
177179
str_cases = [
180+
('path/to/file',
181+
('', '', 'path/to/file', '', '', ''),
182+
('', '', 'path/to/file', '', '')),
183+
('/path/to/file',
184+
('', '', '/path/to/file', '', '', ''),
185+
('', '', '/path/to/file', '', '')),
186+
('//path/to/file',
187+
('', 'path', '/to/file', '', '', ''),
188+
('', 'path', '/to/file', '', '')),
189+
('////path/to/file',
190+
('', '', '//path/to/file', '', '', ''),
191+
('', '', '//path/to/file', '', '')),
192+
('scheme:path/to/file',
193+
('scheme', '', 'path/to/file', '', '', ''),
194+
('scheme', '', 'path/to/file', '', '')),
195+
('scheme:/path/to/file',
196+
('scheme', '', '/path/to/file', '', '', ''),
197+
('scheme', '', '/path/to/file', '', '')),
198+
('scheme://path/to/file',
199+
('scheme', 'path', '/to/file', '', '', ''),
200+
('scheme', 'path', '/to/file', '', '')),
201+
('scheme:////path/to/file',
202+
('scheme', '', '//path/to/file', '', '', ''),
203+
('scheme', '', '//path/to/file', '', '')),
178204
('file:///tmp/junk.txt',
179205
('file', '', '/tmp/junk.txt', '', '', ''),
180206
('file', '', '/tmp/junk.txt', '', '')),
207+
('file:////tmp/junk.txt',
208+
('file', '', '//tmp/junk.txt', '', '', ''),
209+
('file', '', '//tmp/junk.txt', '', '')),
210+
('file://///tmp/junk.txt',
211+
('file', '', '///tmp/junk.txt', '', '', ''),
212+
('file', '', '///tmp/junk.txt', '', '')),
181213
('imap://mail.python.org/mbox1',
182214
('imap', 'mail.python.org', '/mbox1', '', '', ''),
183215
('imap', 'mail.python.org', '/mbox1', '', '')),
@@ -213,6 +245,38 @@ def _encode(t):
213245
for url, parsed, split in str_cases + bytes_cases:
214246
self.checkRoundtrips(url, parsed, split)
215247

248+
def test_roundtrips_normalization(self):
249+
str_cases = [
250+
('///path/to/file',
251+
'/path/to/file',
252+
('', '', '/path/to/file', '', '', ''),
253+
('', '', '/path/to/file', '', '')),
254+
('scheme:///path/to/file',
255+
'scheme:/path/to/file',
256+
('scheme', '', '/path/to/file', '', '', ''),
257+
('scheme', '', '/path/to/file', '', '')),
258+
('file:/tmp/junk.txt',
259+
'file:///tmp/junk.txt',
260+
('file', '', '/tmp/junk.txt', '', '', ''),
261+
('file', '', '/tmp/junk.txt', '', '')),
262+
('http:/tmp/junk.txt',
263+
'http:///tmp/junk.txt',
264+
('http', '', '/tmp/junk.txt', '', '', ''),
265+
('http', '', '/tmp/junk.txt', '', '')),
266+
('https:/tmp/junk.txt',
267+
'https:///tmp/junk.txt',
268+
('https', '', '/tmp/junk.txt', '', '', ''),
269+
('https', '', '/tmp/junk.txt', '', '')),
270+
]
271+
def _encode(t):
272+
return (t[0].encode('ascii'),
273+
t[1].encode('ascii'),
274+
tuple(x.encode('ascii') for x in t[2]),
275+
tuple(x.encode('ascii') for x in t[3]))
276+
bytes_cases = [_encode(x) for x in str_cases]
277+
for url, url2, parsed, split in str_cases + bytes_cases:
278+
self.checkRoundtrips(url, parsed, split, url2)
279+
216280
def test_http_roundtrips(self):
217281
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
218282
# so we test both 'http:' and 'https:' in all the following.

Lib/urllib/parse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ def urlunsplit(components):
525525
empty query; the RFC states that these are equivalent)."""
526526
scheme, netloc, url, query, fragment, _coerce_result = (
527527
_coerce_args(*components))
528-
if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
528+
if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
529529
if url and url[:1] != '/': url = '/' + url
530530
url = '//' + (netloc or '') + url
531531
if scheme:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
2+
Based on patch by Ashwin Ramaswami.

0 commit comments

Comments
 (0)