[3.9] gh-135034: Normalize link targets in tarfile, add os.path.realpath(strict='allow_missing') (GH-135037) (GH-135084)

Addresses CVEs 2024-12718, 2025-4138, 2025-4330, and 2025-4517.
(cherry picked from commit 3612d8f517)

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Seth Michael Larson <seth@python.org>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
T. Wouters 2025-06-03 19:02:50 +02:00 committed by GitHub
parent 24eaf53bc6
commit dd8f187d07
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 949 additions and 137 deletions

View file

@ -29,7 +29,8 @@
"ismount", "expanduser","expandvars","normpath","abspath",
"curdir","pardir","sep","pathsep","defpath","altsep",
"extsep","devnull","realpath","supports_unicode_filenames","relpath",
"samefile", "sameopenfile", "samestat", "commonpath"]
"samefile", "sameopenfile", "samestat", "commonpath",
"ALLOW_MISSING"]
def _get_bothseps(path):
if isinstance(path, bytes):
@ -532,9 +533,10 @@ def abspath(path):
from nt import _getfinalpathname, readlink as _nt_readlink
except ImportError:
# realpath is a no-op on systems without _getfinalpathname support.
realpath = abspath
def realpath(path, *, strict=False):
return abspath(path)
else:
def _readlink_deep(path):
def _readlink_deep(path, ignored_error=OSError):
# These error codes indicate that we should stop reading links and
# return the path we currently have.
# 1: ERROR_INVALID_FUNCTION
@ -567,7 +569,7 @@ def _readlink_deep(path):
path = old_path
break
path = normpath(join(dirname(old_path), path))
except OSError as ex:
except ignored_error as ex:
if ex.winerror in allowed_winerror:
break
raise
@ -576,7 +578,7 @@ def _readlink_deep(path):
break
return path
def _getfinalpathname_nonstrict(path):
def _getfinalpathname_nonstrict(path, ignored_error=OSError):
# These error codes indicate that we should stop resolving the path
# and return the value we currently have.
# 1: ERROR_INVALID_FUNCTION
@ -600,17 +602,18 @@ def _getfinalpathname_nonstrict(path):
try:
path = _getfinalpathname(path)
return join(path, tail) if tail else path
except OSError as ex:
except ignored_error as ex:
if ex.winerror not in allowed_winerror:
raise
try:
# The OS could not resolve this path fully, so we attempt
# to follow the link ourselves. If we succeed, join the tail
# and return.
new_path = _readlink_deep(path)
new_path = _readlink_deep(path,
ignored_error=ignored_error)
if new_path != path:
return join(new_path, tail) if tail else new_path
except OSError:
except ignored_error:
# If we fail to readlink(), let's keep traversing
pass
path, name = split(path)
@ -641,16 +644,24 @@ def realpath(path, *, strict=False):
if normcase(path) == normcase(devnull):
return '\\\\.\\NUL'
had_prefix = path.startswith(prefix)
if strict is ALLOW_MISSING:
ignored_error = FileNotFoundError
strict = True
elif strict:
ignored_error = ()
else:
ignored_error = OSError
if not had_prefix and not isabs(path):
path = join(cwd, path)
try:
path = _getfinalpathname(path)
initial_winerror = 0
except OSError as ex:
if strict:
raise
except ignored_error as ex:
initial_winerror = ex.winerror
path = _getfinalpathname_nonstrict(path)
path = _getfinalpathname_nonstrict(path,
ignored_error=ignored_error)
# The path returned by _getfinalpathname will always start with \\?\ -
# strip off that prefix unless it was already provided on the original
# path.