Skip to content

Commit d783d7b

Browse files
authoredMar 18, 2025
GH-126367: url2pathname(): handle NTFS alternate data streams (#131428)
Adjust `url2pathname()` to decode embedded colon characters in Windows URIs, rather than bailing out with an `OSError`.
1 parent 01b5abb commit d783d7b

File tree

4 files changed

+20
-17
lines changed

4 files changed

+20
-17
lines changed
 

Diff for: ‎Doc/library/urllib.request.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,9 @@ The :mod:`urllib.request` module defines the following functions:
182182
'C:\\Program Files'
183183

184184
.. versionchanged:: 3.14
185-
Windows drive letters are no longer converted to uppercase.
185+
Windows drive letters are no longer converted to uppercase, and ``:``
186+
characters not following a drive letter no longer cause an
187+
:exc:`OSError` exception to be raised on Windows.
186188

187189

188190
.. function:: getproxies()

Diff for: ‎Lib/nturl2path.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def url2pathname(url):
1414
# ///C:/foo/bar/spam.foo
1515
# become
1616
# C:\foo\bar\spam.foo
17-
import string, urllib.parse
17+
import urllib.parse
1818
if url[:3] == '///':
1919
# URL has an empty authority section, so the path begins on the third
2020
# character.
@@ -25,19 +25,14 @@ def url2pathname(url):
2525
if url[:3] == '///':
2626
# Skip past extra slash before UNC drive in URL path.
2727
url = url[1:]
28-
# Windows itself uses ":" even in URLs.
29-
url = url.replace(':', '|')
30-
if not '|' in url:
31-
# No drive specifier, just convert slashes
32-
# make sure not to convert quoted slashes :-)
33-
return urllib.parse.unquote(url.replace('/', '\\'))
34-
comp = url.split('|')
35-
if len(comp) != 2 or comp[0][-1] not in string.ascii_letters:
36-
error = 'Bad URL: ' + url
37-
raise OSError(error)
38-
drive = comp[0][-1]
39-
tail = urllib.parse.unquote(comp[1].replace('/', '\\'))
40-
return drive + ':' + tail
28+
else:
29+
if url[:1] == '/' and url[2:3] in (':', '|'):
30+
# Skip past extra slash before DOS drive in URL path.
31+
url = url[1:]
32+
if url[1:2] == '|':
33+
# Older URLs use a pipe after a drive letter
34+
url = url[:1] + ':' + url[2:]
35+
return urllib.parse.unquote(url.replace('/', '\\'))
4136

4237
def pathname2url(p):
4338
"""OS-specific conversion from a file system path to a relative URL

Diff for: ‎Lib/test/test_urllib.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,7 @@ def test_pathname2url_nonascii(self):
14841484
'test specific to Windows pathnames.')
14851485
def test_url2pathname_win(self):
14861486
fn = urllib.request.url2pathname
1487+
self.assertEqual(fn('/'), '\\')
14871488
self.assertEqual(fn('/C:/'), 'C:\\')
14881489
self.assertEqual(fn("///C|"), 'C:')
14891490
self.assertEqual(fn("///C:"), 'C:')
@@ -1502,8 +1503,10 @@ def test_url2pathname_win(self):
15021503
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
15031504
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
15041505
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo')
1505-
# Non-ASCII drive letter
1506-
self.assertRaises(IOError, fn, "///\u00e8|/")
1506+
# Colons in URI
1507+
self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\')
1508+
self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs')
1509+
self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs')
15071510
# UNC paths
15081511
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
15091512
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix issue where :func:`urllib.request.url2pathname` raised :exc:`OSError`
2+
when given a Windows URI containing a colon character not following a drive
3+
letter, such as before an NTFS alternate data stream.

0 commit comments

Comments
 (0)