GH-126367: url2pathname(): handle NTFS alternate data streams (#131428)

Adjust `url2pathname()` to decode embedded colon characters in Windows
URIs, rather than bailing out with an `OSError`.
This commit is contained in:
Barney Gale 2025-03-18 23:37:12 +00:00 committed by GitHub
parent 01b5abbc53
commit d783d7b51d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 20 additions and 17 deletions

View file

@ -182,7 +182,9 @@ The :mod:`urllib.request` module defines the following functions:
'C:\\Program Files' 'C:\\Program Files'
.. versionchanged:: 3.14 .. versionchanged:: 3.14
Windows drive letters are no longer converted to uppercase. Windows drive letters are no longer converted to uppercase, and ``:``
characters not following a drive letter no longer cause an
:exc:`OSError` exception to be raised on Windows.
.. function:: getproxies() .. function:: getproxies()

View file

@ -14,7 +14,7 @@ def url2pathname(url):
# ///C:/foo/bar/spam.foo # ///C:/foo/bar/spam.foo
# become # become
# C:\foo\bar\spam.foo # C:\foo\bar\spam.foo
import string, urllib.parse import urllib.parse
if url[:3] == '///': if url[:3] == '///':
# URL has an empty authority section, so the path begins on the third # URL has an empty authority section, so the path begins on the third
# character. # character.
@ -25,19 +25,14 @@ def url2pathname(url):
if url[:3] == '///': if url[:3] == '///':
# Skip past extra slash before UNC drive in URL path. # Skip past extra slash before UNC drive in URL path.
url = url[1:] url = url[1:]
# Windows itself uses ":" even in URLs. else:
url = url.replace(':', '|') if url[:1] == '/' and url[2:3] in (':', '|'):
if not '|' in url: # Skip past extra slash before DOS drive in URL path.
# No drive specifier, just convert slashes url = url[1:]
# make sure not to convert quoted slashes :-) if url[1:2] == '|':
return urllib.parse.unquote(url.replace('/', '\\')) # Older URLs use a pipe after a drive letter
comp = url.split('|') url = url[:1] + ':' + url[2:]
if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: return urllib.parse.unquote(url.replace('/', '\\'))
error = 'Bad URL: ' + url
raise OSError(error)
drive = comp[0][-1]
tail = urllib.parse.unquote(comp[1].replace('/', '\\'))
return drive + ':' + tail
def pathname2url(p): def pathname2url(p):
"""OS-specific conversion from a file system path to a relative URL """OS-specific conversion from a file system path to a relative URL

View file

@ -1484,6 +1484,7 @@ def test_pathname2url_nonascii(self):
'test specific to Windows pathnames.') 'test specific to Windows pathnames.')
def test_url2pathname_win(self): def test_url2pathname_win(self):
fn = urllib.request.url2pathname fn = urllib.request.url2pathname
self.assertEqual(fn('/'), '\\')
self.assertEqual(fn('/C:/'), 'C:\\') self.assertEqual(fn('/C:/'), 'C:\\')
self.assertEqual(fn("///C|"), 'C:') self.assertEqual(fn("///C|"), 'C:')
self.assertEqual(fn("///C:"), 'C:') self.assertEqual(fn("///C:"), 'C:')
@ -1502,8 +1503,10 @@ def test_url2pathname_win(self):
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo') self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo')
# Non-ASCII drive letter # Colons in URI
self.assertRaises(IOError, fn, "///\u00e8|/") self.assertEqual(fn('///\u00e8|/'), '\u00e8:\\')
self.assertEqual(fn('//host/share/spam.txt:eggs'), '\\\\host\\share\\spam.txt:eggs')
self.assertEqual(fn('///c:/spam.txt:eggs'), 'c:\\spam.txt:eggs')
# UNC paths # UNC paths
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')

View file

@ -0,0 +1,3 @@
Fix issue where :func:`urllib.request.url2pathname` raised :exc:`OSError`
when given a Windows URI containing a colon character not following a drive
letter, such as before an NTFS alternate data stream.