This commit is contained in:
Seth Michael Larson 2026-02-05 08:06:30 +00:00 committed by GitHub
commit be97c91d31
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 47 additions and 4 deletions

View file

@ -1277,6 +1277,20 @@ def _create_pax_generic_header(cls, pax_headers, type, encoding):
@classmethod
def frombuf(cls, buf, encoding, errors):
"""Construct a TarInfo object from a 512 byte bytes object.
To support the old v7 tar format AREGTYPE headers are
transformed to DIRTYPE headers if their name ends in '/'.
"""
return cls._frombuf(buf, encoding, errors)
@classmethod
def _frombuf(cls, buf, encoding, errors, *, dircheck=True):
"""Construct a TarInfo object from a 512 byte bytes object.
If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will
be normalized to ``DIRTYPE`` if the name ends in a trailing slash.
``dircheck`` must be set to ``False`` if this function is called
on a follow-up header such as ``GNUTYPE_LONGNAME``.
"""
if len(buf) == 0:
raise EmptyHeaderError("empty header")
@ -1307,7 +1321,7 @@ def frombuf(cls, buf, encoding, errors):
# Old V7 tar format represents a directory as a regular
# file with a trailing slash.
if obj.type == AREGTYPE and obj.name.endswith("/"):
if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"):
obj.type = DIRTYPE
# The old GNU sparse format occupies some of the unused
@ -1342,8 +1356,15 @@ def fromtarfile(cls, tarfile):
"""Return the next TarInfo object from TarFile object
tarfile.
"""
return cls._fromtarfile(tarfile)
@classmethod
def _fromtarfile(cls, tarfile, *, dircheck=True):
"""
See dircheck documentation in _frombuf().
"""
buf = tarfile.fileobj.read(BLOCKSIZE)
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck)
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
return obj._proc_member(tarfile)
@ -1401,7 +1422,7 @@ def _proc_gnulong(self, tarfile):
# Fetch the next header and process it.
try:
next = self.fromtarfile(tarfile)
next = self._fromtarfile(tarfile, dircheck=False)
except HeaderError as e:
raise SubsequentHeaderError(str(e)) from None
@ -1536,7 +1557,7 @@ def _proc_pax(self, tarfile):
# Fetch the next header.
try:
next = self.fromtarfile(tarfile)
next = self._fromtarfile(tarfile, dircheck=False)
except HeaderError as e:
raise SubsequentHeaderError(str(e)) from None

View file

@ -1234,6 +1234,25 @@ def test_longname_directory(self):
self.assertIsNotNone(tar.getmember(longdir))
self.assertIsNotNone(tar.getmember(longdir.removesuffix('/')))
def test_longname_file_not_directory(self):
# Test reading a longname file and ensure it is not handled as a directory
# Issue #141707
buf = io.BytesIO()
with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar:
ti = tarfile.TarInfo()
ti.type = tarfile.AREGTYPE
ti.name = ('a' * 99) + '/' + ('b' * 3)
tar.addfile(ti)
expected = {t.name: t.type for t in tar.getmembers()}
buf.seek(0)
with tarfile.open(mode='r', fileobj=buf) as tar:
actual = {t.name: t.type for t in tar.getmembers()}
self.assertEqual(expected, actual)
class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
subdir = "gnu"

View file

@ -1546,6 +1546,7 @@ Ashwin Ramaswami
Jeff Ramnani
Grant Ramsay
Bayard Randel
Eashwar Ranganathan
Varpu Rantala
Brodie Rao
Rémi Rampin

View file

@ -0,0 +1,2 @@
Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing
GNU long name or link headers.