mirror of
https://github.com/python/cpython.git
synced 2026-06-18 23:53:28 +00:00
[3.13] gh-149486: tarfile.data_filter: validate written link target (GH-149487) (GH-149555)
* gh-149486: tarfile.data_filter: validate written link target (GH-149487)
The data filter rewrote linknames with normpath() but ran the
containment check against the un-normalised value, and computed a
symlink's directory before stripping trailing slashes. Both let a
crafted archive create links pointing outside the destination. Also
reject link members that resolve to the destination directory itself,
which could otherwise replace it with a symlink and redirect all
subsequent members.
(Patch by Greg; Petr's just reviewing & merging.)
(cherry picked from commit 578411982c)
Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
parent
6ab65b3e7e
commit
0478bd83d8
3 changed files with 133 additions and 39 deletions
|
|
@ -819,16 +819,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
|
|||
if member.islnk() or member.issym():
|
||||
if os.path.isabs(member.linkname):
|
||||
raise AbsoluteLinkError(member)
|
||||
# A link member that resolves to the destination directory itself
|
||||
# would replace it with a (sym)link, redirecting the destination
|
||||
# for all subsequent members.
|
||||
if target_path == dest_path:
|
||||
raise OutsideDestinationError(member, target_path)
|
||||
normalized = os.path.normpath(member.linkname)
|
||||
if normalized != member.linkname:
|
||||
new_attrs['linkname'] = normalized
|
||||
if member.issym():
|
||||
target_path = os.path.join(dest_path,
|
||||
os.path.dirname(name),
|
||||
member.linkname)
|
||||
# The symlink is created at `name` with trailing separators
|
||||
# stripped, so its target is relative to the directory
|
||||
# containing that path.
|
||||
link_dir = os.path.dirname(name.rstrip('/' + os.sep))
|
||||
target_path = os.path.join(dest_path, link_dir, normalized)
|
||||
else:
|
||||
target_path = os.path.join(dest_path,
|
||||
member.linkname)
|
||||
target_path = os.path.join(dest_path, normalized)
|
||||
target_path = os.path.realpath(target_path,
|
||||
strict=os.path.ALLOW_MISSING)
|
||||
if os.path.commonpath([target_path, dest_path]) != dest_path:
|
||||
|
|
|
|||
|
|
@ -3649,6 +3649,39 @@ class TestExtractionFilters(unittest.TestCase):
|
|||
# The destination for the extraction, within `outerdir`
|
||||
destdir = outerdir / 'dest'
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
# Posix and Windows have different pathname resolution:
|
||||
# either symlink or a '..' component resolve first.
|
||||
# Let's see which we are on.
|
||||
if os_helper.can_symlink():
|
||||
testpath = os.path.join(TEMPDIR, 'resolution_test')
|
||||
os.mkdir(testpath)
|
||||
|
||||
# testpath/current links to `.` which is all of:
|
||||
# - `testpath`
|
||||
# - `testpath/current`
|
||||
# - `testpath/current/current`
|
||||
# - etc.
|
||||
os.symlink('.', os.path.join(testpath, 'current'))
|
||||
|
||||
# we'll test where `testpath/current/../file` ends up
|
||||
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
|
||||
pass
|
||||
|
||||
if os.path.exists(os.path.join(testpath, 'file')):
|
||||
# Windows collapses 'current\..' to '.' first, leaving
|
||||
# 'testpath\file'
|
||||
cls.dotdot_resolves_early = True
|
||||
elif os.path.exists(os.path.join(testpath, '..', 'file')):
|
||||
# Posix resolves 'current' to '.' first, leaving
|
||||
# 'testpath/../file'
|
||||
cls.dotdot_resolves_early = False
|
||||
else:
|
||||
raise AssertionError('Could not determine link resolution')
|
||||
else:
|
||||
cls.dotdot_resolves_early = False
|
||||
|
||||
@contextmanager
|
||||
def check_context(self, tar, filter, *, check_flag=True):
|
||||
"""Extracts `tar` to `self.destdir` and allows checking the result
|
||||
|
|
@ -3820,10 +3853,19 @@ def test_parent_symlink(self):
|
|||
+ "which is outside the destination")
|
||||
|
||||
with self.check_context(arc.open(), 'data'):
|
||||
self.expect_exception(
|
||||
tarfile.LinkOutsideDestinationError,
|
||||
"""'parent' would link to ['"].*outerdir['"], """
|
||||
+ "which is outside the destination")
|
||||
if self.dotdot_resolves_early:
|
||||
# 'current/../..' normalises to '..', which is rejected.
|
||||
self.expect_exception(
|
||||
tarfile.LinkOutsideDestinationError,
|
||||
"""'parent' would link to ['"].*outerdir['"], """
|
||||
+ "which is outside the destination")
|
||||
else:
|
||||
# 'current/..' normalises to '.'; the rewritten link is
|
||||
# created and 'parent/evil' lands harmlessly inside the
|
||||
# destination.
|
||||
self.expect_file('current', symlink_to='.')
|
||||
self.expect_file('parent', symlink_to='.')
|
||||
self.expect_file('evil')
|
||||
|
||||
else:
|
||||
# No symlink support. The symlinks are ignored.
|
||||
|
|
@ -3913,35 +3955,6 @@ def test_parent_symlink2(self):
|
|||
# Test interplaying symlinks
|
||||
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
|
||||
|
||||
# Posix and Windows have different pathname resolution:
|
||||
# either symlink or a '..' component resolve first.
|
||||
# Let's see which we are on.
|
||||
if os_helper.can_symlink():
|
||||
testpath = os.path.join(TEMPDIR, 'resolution_test')
|
||||
os.mkdir(testpath)
|
||||
|
||||
# testpath/current links to `.` which is all of:
|
||||
# - `testpath`
|
||||
# - `testpath/current`
|
||||
# - `testpath/current/current`
|
||||
# - etc.
|
||||
os.symlink('.', os.path.join(testpath, 'current'))
|
||||
|
||||
# we'll test where `testpath/current/../file` ends up
|
||||
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
|
||||
pass
|
||||
|
||||
if os.path.exists(os.path.join(testpath, 'file')):
|
||||
# Windows collapses 'current\..' to '.' first, leaving
|
||||
# 'testpath\file'
|
||||
dotdot_resolves_early = True
|
||||
elif os.path.exists(os.path.join(testpath, '..', 'file')):
|
||||
# Posix resolves 'current' to '.' first, leaving
|
||||
# 'testpath/../file'
|
||||
dotdot_resolves_early = False
|
||||
else:
|
||||
raise AssertionError('Could not determine link resolution')
|
||||
|
||||
with ArchiveMaker() as arc:
|
||||
|
||||
# `current` links to `.` which is both the destination directory
|
||||
|
|
@ -3977,7 +3990,7 @@ def test_parent_symlink2(self):
|
|||
|
||||
with self.check_context(arc.open(), 'data'):
|
||||
if os_helper.can_symlink():
|
||||
if dotdot_resolves_early:
|
||||
if self.dotdot_resolves_early:
|
||||
# Fail when extracting a file outside destination
|
||||
self.expect_exception(
|
||||
tarfile.OutsideDestinationError,
|
||||
|
|
@ -4097,6 +4110,76 @@ def test_sly_relative2(self):
|
|||
+ """['"].*moo['"], which is outside the """
|
||||
+ "destination")
|
||||
|
||||
@symlink_test
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_normpath_realpath_mismatch(self):
|
||||
# The link-target check must validate the value that will actually
|
||||
# be written to disk (the normalised linkname), not the original.
|
||||
# Here 'a' is a symlink to a deep nonexistent path, so realpath()
|
||||
# of 'a/../../...' stays inside the destination while normpath()
|
||||
# collapses 'a/..' lexically and escapes.
|
||||
depth = len(self.destdir.parts) + 5
|
||||
deep = '/'.join(f'p{i}' for i in range(depth))
|
||||
sneaky = 'a/' + '../' * depth + 'flag'
|
||||
for kind in 'symlink_to', 'hardlink_to':
|
||||
with self.subTest(kind):
|
||||
with ArchiveMaker() as arc:
|
||||
arc.add('a', symlink_to=deep)
|
||||
arc.add('escape', **{kind: sneaky})
|
||||
with self.check_context(arc.open(), 'data'):
|
||||
self.expect_exception(
|
||||
tarfile.LinkOutsideDestinationError)
|
||||
|
||||
@symlink_test
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_symlink_trailing_slash(self):
|
||||
# A trailing slash on a symlink member's name must not cause the
|
||||
# link target to be resolved relative to the wrong directory.
|
||||
with ArchiveMaker() as arc:
|
||||
t = tarfile.TarInfo('x/')
|
||||
t.type = tarfile.SYMTYPE
|
||||
t.linkname = '..'
|
||||
arc.tar_w.addfile(t)
|
||||
arc.add('x/escaped', content='hi')
|
||||
|
||||
with self.check_context(arc.open(), 'data'):
|
||||
self.expect_exception(tarfile.LinkOutsideDestinationError)
|
||||
|
||||
@symlink_test
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_link_at_destination(self):
|
||||
# A link member whose name resolves to the destination directory
|
||||
# itself must be rejected: otherwise the destination is replaced
|
||||
# by a symlink and later members can be redirected through it.
|
||||
for name in '', '.', './':
|
||||
with ArchiveMaker() as arc:
|
||||
t = tarfile.TarInfo(name)
|
||||
t.type = tarfile.SYMTYPE
|
||||
t.linkname = '.'
|
||||
arc.tar_w.addfile(t)
|
||||
|
||||
with self.check_context(arc.open(), 'data'):
|
||||
self.expect_exception(tarfile.OutsideDestinationError)
|
||||
|
||||
@symlink_test
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_empty_name_symlink_chain(self):
|
||||
# Regression test for a chain of empty-named symlinks that
|
||||
# incrementally redirects the destination outwards.
|
||||
with ArchiveMaker() as arc:
|
||||
for name, target in [('', ''), ('a/', '..'),
|
||||
('', 'dummy'), ('', 'a'),
|
||||
('b/', '..'),
|
||||
('', 'dummy'), ('', 'a/b')]:
|
||||
t = tarfile.TarInfo(name)
|
||||
t.type = tarfile.SYMTYPE
|
||||
t.linkname = target
|
||||
arc.tar_w.addfile(t)
|
||||
arc.add('escaped', content='hi')
|
||||
|
||||
with self.check_context(arc.open(), 'data'):
|
||||
self.expect_exception(tarfile.FilterError)
|
||||
|
||||
@symlink_test
|
||||
def test_deep_symlink(self):
|
||||
# Test that symlinks and hardlinks inside a directory
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
:func:`tarfile.data_filter` now validates link targets using the same
|
||||
normalised value that is written to disk, strips trailing separators from
|
||||
the member name when resolving a symlink's directory, and rejects link
|
||||
members that would replace the destination directory itself. This closes
|
||||
several path-traversal bypasses of the ``data`` extraction filter.
|
||||
Loading…
Add table
Add a link
Reference in a new issue