gh-136065: Fix quadratic complexity in os.path.expandvars() (GH-134952)

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
This commit is contained in:
Serhiy Storchaka 2025-10-31 15:49:51 +02:00 committed by GitHub
parent 81cec22a9b
commit f029e8db62
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 96 additions and 116 deletions

View file

@ -400,17 +400,23 @@ def expanduser(path):
# XXX With COMMAND.COM you can use any characters in a variable name, # XXX With COMMAND.COM you can use any characters in a variable name,
# XXX except '^|<>='. # XXX except '^|<>='.
_varpattern = r"'[^']*'?|%(%|[^%]*%?)|\$(\$|[-\w]+|\{[^}]*\}?)"
_varsub = None
_varsubb = None
def expandvars(path): def expandvars(path):
"""Expand shell variables of the forms $var, ${var} and %var%. """Expand shell variables of the forms $var, ${var} and %var%.
Unknown variables are left unchanged.""" Unknown variables are left unchanged."""
path = os.fspath(path) path = os.fspath(path)
global _varsub, _varsubb
if isinstance(path, bytes): if isinstance(path, bytes):
if b'$' not in path and b'%' not in path: if b'$' not in path and b'%' not in path:
return path return path
import string if not _varsubb:
varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii') import re
quote = b'\'' _varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
sub = _varsubb
percent = b'%' percent = b'%'
brace = b'{' brace = b'{'
rbrace = b'}' rbrace = b'}'
@ -419,94 +425,44 @@ def expandvars(path):
else: else:
if '$' not in path and '%' not in path: if '$' not in path and '%' not in path:
return path return path
import string if not _varsub:
varchars = string.ascii_letters + string.digits + '_-' import re
quote = '\'' _varsub = re.compile(_varpattern, re.ASCII).sub
sub = _varsub
percent = '%' percent = '%'
brace = '{' brace = '{'
rbrace = '}' rbrace = '}'
dollar = '$' dollar = '$'
environ = os.environ environ = os.environ
res = path[:0]
index = 0 def repl(m):
pathlen = len(path) lastindex = m.lastindex
while index < pathlen: if lastindex is None:
c = path[index:index+1] return m[0]
if c == quote: # no expansion within single quotes name = m[lastindex]
path = path[index + 1:] if lastindex == 1:
pathlen = len(path) if name == percent:
try: return name
index = path.index(c) if not name.endswith(percent):
res += c + path[:index + 1] return m[0]
except ValueError: name = name[:-1]
res += c + path
index = pathlen - 1
elif c == percent: # variable or '%'
if path[index + 1:index + 2] == percent:
res += c
index += 1
else: else:
path = path[index+1:] if name == dollar:
pathlen = len(path) return name
try: if name.startswith(brace):
index = path.index(percent) if not name.endswith(rbrace):
except ValueError: return m[0]
res += percent + path name = name[1:-1]
index = pathlen - 1
else:
var = path[:index]
try: try:
if environ is None: if environ is None:
value = os.fsencode(os.environ[os.fsdecode(var)]) return os.fsencode(os.environ[os.fsdecode(name)])
else: else:
value = environ[var] return environ[name]
except KeyError: except KeyError:
value = percent + var + percent return m[0]
res += value
elif c == dollar: # variable or '$$' return sub(repl, path)
if path[index + 1:index + 2] == dollar:
res += c
index += 1
elif path[index + 1:index + 2] == brace:
path = path[index+2:]
pathlen = len(path)
try:
index = path.index(rbrace)
except ValueError:
res += dollar + brace + path
index = pathlen - 1
else:
var = path[:index]
try:
if environ is None:
value = os.fsencode(os.environ[os.fsdecode(var)])
else:
value = environ[var]
except KeyError:
value = dollar + brace + var + rbrace
res += value
else:
var = path[:0]
index += 1
c = path[index:index + 1]
while c and c in varchars:
var += c
index += 1
c = path[index:index + 1]
try:
if environ is None:
value = os.fsencode(os.environ[os.fsdecode(var)])
else:
value = environ[var]
except KeyError:
value = dollar + var
res += value
if c:
index -= 1
else:
res += c
index += 1
return res
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B. # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.

View file

@ -285,42 +285,41 @@ def expanduser(path):
# This expands the forms $variable and ${variable} only. # This expands the forms $variable and ${variable} only.
# Non-existent variables are left unchanged. # Non-existent variables are left unchanged.
_varprog = None _varpattern = r'\$(\w+|\{[^}]*\}?)'
_varprogb = None _varsub = None
_varsubb = None
def expandvars(path): def expandvars(path):
"""Expand shell variables of form $var and ${var}. Unknown variables """Expand shell variables of form $var and ${var}. Unknown variables
are left unchanged.""" are left unchanged."""
path = os.fspath(path) path = os.fspath(path)
global _varprog, _varprogb global _varsub, _varsubb
if isinstance(path, bytes): if isinstance(path, bytes):
if b'$' not in path: if b'$' not in path:
return path return path
if not _varprogb: if not _varsubb:
import re import re
_varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII) _varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
search = _varprogb.search sub = _varsubb
start = b'{' start = b'{'
end = b'}' end = b'}'
environ = getattr(os, 'environb', None) environ = getattr(os, 'environb', None)
else: else:
if '$' not in path: if '$' not in path:
return path return path
if not _varprog: if not _varsub:
import re import re
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) _varsub = re.compile(_varpattern, re.ASCII).sub
search = _varprog.search sub = _varsub
start = '{' start = '{'
end = '}' end = '}'
environ = os.environ environ = os.environ
i = 0
while True: def repl(m):
m = search(path, i) name = m[1]
if not m: if name.startswith(start):
break if not name.endswith(end):
i, j = m.span(0) return m[0]
name = m.group(1)
if name.startswith(start) and name.endswith(end):
name = name[1:-1] name = name[1:-1]
try: try:
if environ is None: if environ is None:
@ -328,13 +327,11 @@ def expandvars(path):
else: else:
value = environ[name] value = environ[name]
except KeyError: except KeyError:
i = j return m[0]
else: else:
tail = path[j:] return value
path = path[:i] + value
i = len(path) return sub(repl, path)
path += tail
return path
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.

View file

@ -9,9 +9,9 @@
import sys import sys
import unittest import unittest
import warnings import warnings
from test.support import ( from test import support
is_apple, os_helper, warnings_helper from test.support import os_helper
) from test.support import warnings_helper
from test.support.script_helper import assert_python_ok from test.support.script_helper import assert_python_ok
from test.support.os_helper import FakePath from test.support.os_helper import FakePath
@ -462,6 +462,19 @@ def check(value, expected):
os.fsencode('$bar%s bar' % nonascii)) os.fsencode('$bar%s bar' % nonascii))
check(b'$spam}bar', os.fsencode('%s}bar' % nonascii)) check(b'$spam}bar', os.fsencode('%s}bar' % nonascii))
@support.requires_resource('cpu')
def test_expandvars_large(self):
expandvars = self.pathmodule.expandvars
with os_helper.EnvironmentVarGuard() as env:
env.clear()
env["A"] = "B"
n = 100_000
self.assertEqual(expandvars('$A'*n), 'B'*n)
self.assertEqual(expandvars('${A}'*n), 'B'*n)
self.assertEqual(expandvars('$A!'*n), 'B!'*n)
self.assertEqual(expandvars('${A}A'*n), 'BA'*n)
self.assertEqual(expandvars('${'*10*n), '${'*10*n)
def test_abspath(self): def test_abspath(self):
self.assertIn("foo", self.pathmodule.abspath("foo")) self.assertIn("foo", self.pathmodule.abspath("foo"))
with warnings.catch_warnings(): with warnings.catch_warnings():
@ -519,7 +532,7 @@ def test_nonascii_abspath(self):
# directory (when the bytes name is used). # directory (when the bytes name is used).
and sys.platform not in { and sys.platform not in {
"win32", "emscripten", "wasi" "win32", "emscripten", "wasi"
} and not is_apple } and not support.is_apple
): ):
name = os_helper.TESTFN_UNDECODABLE name = os_helper.TESTFN_UNDECODABLE
elif os_helper.TESTFN_NONASCII: elif os_helper.TESTFN_NONASCII:

View file

@ -9,7 +9,7 @@
import warnings import warnings
from ntpath import ALL_BUT_LAST, ALLOW_MISSING from ntpath import ALL_BUT_LAST, ALLOW_MISSING
from test import support from test import support
from test.support import TestFailed, cpython_only, os_helper from test.support import os_helper
from test.support.os_helper import FakePath from test.support.os_helper import FakePath
from test import test_genericpath from test import test_genericpath
from tempfile import TemporaryFile from tempfile import TemporaryFile
@ -59,7 +59,7 @@ def tester(fn, wantResult):
fn = fn.replace("\\", "\\\\") fn = fn.replace("\\", "\\\\")
gotResult = eval(fn) gotResult = eval(fn)
if wantResult != gotResult and _norm(wantResult) != _norm(gotResult): if wantResult != gotResult and _norm(wantResult) != _norm(gotResult):
raise TestFailed("%s should return: %s but returned: %s" \ raise support.TestFailed("%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), str(gotResult))) %(str(fn), str(wantResult), str(gotResult)))
# then with bytes # then with bytes
@ -75,7 +75,7 @@ def tester(fn, wantResult):
warnings.simplefilter("ignore", DeprecationWarning) warnings.simplefilter("ignore", DeprecationWarning)
gotResult = eval(fn) gotResult = eval(fn)
if _norm(wantResult) != _norm(gotResult): if _norm(wantResult) != _norm(gotResult):
raise TestFailed("%s should return: %s but returned: %s" \ raise support.TestFailed("%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), repr(gotResult))) %(str(fn), str(wantResult), repr(gotResult)))
@ -1133,6 +1133,19 @@ def check(value, expected):
check('%spam%bar', '%sbar' % nonascii) check('%spam%bar', '%sbar' % nonascii)
check('%{}%bar'.format(nonascii), 'ham%sbar' % nonascii) check('%{}%bar'.format(nonascii), 'ham%sbar' % nonascii)
@support.requires_resource('cpu')
def test_expandvars_large(self):
expandvars = ntpath.expandvars
with os_helper.EnvironmentVarGuard() as env:
env.clear()
env["A"] = "B"
n = 100_000
self.assertEqual(expandvars('%A%'*n), 'B'*n)
self.assertEqual(expandvars('%A%A'*n), 'BA'*n)
self.assertEqual(expandvars("''"*n + '%%'), "''"*n + '%')
self.assertEqual(expandvars("%%"*n), "%"*n)
self.assertEqual(expandvars("$$"*n), "$"*n)
def test_expanduser(self): def test_expanduser(self):
tester('ntpath.expanduser("test")', 'test') tester('ntpath.expanduser("test")', 'test')
@ -1550,7 +1563,7 @@ def test_con_device(self):
self.assertTrue(os.path.exists(r"\\.\CON")) self.assertTrue(os.path.exists(r"\\.\CON"))
@unittest.skipIf(sys.platform != 'win32', "Fast paths are only for win32") @unittest.skipIf(sys.platform != 'win32', "Fast paths are only for win32")
@cpython_only @support.cpython_only
def test_fast_paths_in_use(self): def test_fast_paths_in_use(self):
# There are fast paths of these functions implemented in posixmodule.c. # There are fast paths of these functions implemented in posixmodule.c.
# Confirm that they are being used, and not the Python fallbacks in # Confirm that they are being used, and not the Python fallbacks in

View file

@ -0,0 +1 @@
Fix quadratic complexity in :func:`os.path.expandvars`.