mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
Sync with danny0838/zipremove@1843d87b70
This commit is contained in:
parent
11c09378b2
commit
4b2176e890
3 changed files with 54 additions and 56 deletions
|
|
@ -527,7 +527,7 @@ ZipFile Objects
|
|||
a path is provided.
|
||||
|
||||
This does not physically remove the local file entry from the archive.
|
||||
Call :meth:`ZipFile.repack` afterwards to reclaim space.
|
||||
Call :meth:`repack` afterwards to reclaim space.
|
||||
|
||||
The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``.
|
||||
|
||||
|
|
|
|||
|
|
@ -1362,8 +1362,11 @@ class ZstdWriterTests(AbstractWriterTests, unittest.TestCase):
|
|||
compression = zipfile.ZIP_ZSTANDARD
|
||||
|
||||
|
||||
def ComparableZipInfo(zinfo):
|
||||
return (zinfo.filename, zinfo.header_offset, zinfo.compress_size, zinfo.CRC)
|
||||
class ComparableZipInfo:
|
||||
keys = [i for i in zipfile.ZipInfo.__slots__ if not i.startswith('_')]
|
||||
|
||||
def __new__(cls, zinfo):
|
||||
return {i: getattr(zinfo, i) for i in cls.keys}
|
||||
|
||||
_struct_pack = struct.pack
|
||||
|
||||
|
|
@ -1379,6 +1382,8 @@ def struct_pack_no_dd_sig(fmt, *values):
|
|||
|
||||
class RepackHelperMixin:
|
||||
"""Common helpers for remove and repack."""
|
||||
maxDiff = 8192
|
||||
|
||||
@classmethod
|
||||
def _prepare_test_files(cls):
|
||||
return [
|
||||
|
|
@ -1389,14 +1394,11 @@ def _prepare_test_files(cls):
|
|||
|
||||
@classmethod
|
||||
def _prepare_zip_from_test_files(cls, zfname, test_files, force_zip64=False):
|
||||
zinfos = []
|
||||
with zipfile.ZipFile(zfname, 'w', cls.compression) as zh:
|
||||
for file, data in test_files:
|
||||
with zh.open(file, 'w', force_zip64=force_zip64) as fh:
|
||||
fh.write(data)
|
||||
zinfo = zh.getinfo(file)
|
||||
zinfos.append(ComparableZipInfo(zinfo))
|
||||
return zinfos
|
||||
return list(zh.infolist())
|
||||
|
||||
class AbstractRemoveTests(RepackHelperMixin):
|
||||
@classmethod
|
||||
|
|
@ -1416,7 +1418,7 @@ def test_remove_by_name(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zi for j, zi in enumerate(zinfos) if j != i],
|
||||
[ComparableZipInfo(zi) for j, zi in enumerate(zinfos) if j != i],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
|
|
@ -1437,7 +1439,7 @@ def test_remove_by_zinfo(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zi for j, zi in enumerate(zinfos) if j != i],
|
||||
[ComparableZipInfo(zi) for j, zi in enumerate(zinfos) if j != i],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
|
|
@ -1478,13 +1480,13 @@ def test_remove_by_name_duplicated(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zinfos[0], zinfos[2]],
|
||||
[ComparableZipInfo(zi) for zi in [zinfos[0], zinfos[2]]],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
self.assertEqual(
|
||||
ComparableZipInfo(zh.getinfo('file.txt')),
|
||||
zinfos[0],
|
||||
ComparableZipInfo(zinfos[0]),
|
||||
)
|
||||
|
||||
# make sure the zip file is still valid
|
||||
|
|
@ -1499,7 +1501,7 @@ def test_remove_by_name_duplicated(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zinfos[2]],
|
||||
[ComparableZipInfo(zi) for zi in [zinfos[2]]],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
|
|
@ -1528,13 +1530,13 @@ def test_remove_by_zinfo_duplicated(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zinfos[1], zinfos[2]],
|
||||
[ComparableZipInfo(zi) for zi in [zinfos[1], zinfos[2]]],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
self.assertEqual(
|
||||
ComparableZipInfo(zh.getinfo('file.txt')),
|
||||
zinfos[1],
|
||||
ComparableZipInfo(zinfos[1]),
|
||||
)
|
||||
|
||||
# make sure the zip file is still valid
|
||||
|
|
@ -1548,13 +1550,13 @@ def test_remove_by_zinfo_duplicated(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zinfos[0], zinfos[2]],
|
||||
[ComparableZipInfo(zi) for zi in [zinfos[0], zinfos[2]]],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
self.assertEqual(
|
||||
ComparableZipInfo(zh.getinfo('file.txt')),
|
||||
zinfos[0],
|
||||
ComparableZipInfo(zinfos[0]),
|
||||
)
|
||||
|
||||
# make sure the zip file is still valid
|
||||
|
|
@ -1570,7 +1572,7 @@ def test_remove_by_zinfo_duplicated(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zinfos[2]],
|
||||
[ComparableZipInfo(zi) for zi in [zinfos[2]]],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
|
|
@ -1591,7 +1593,7 @@ def test_remove_zip64(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zi for j, zi in enumerate(zinfos) if j != i],
|
||||
[ComparableZipInfo(zi) for j, zi in enumerate(zinfos) if j != i],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
|
|
@ -1626,14 +1628,14 @@ def test_remove_mode_w(self):
|
|||
with zipfile.ZipFile(TESTFN, 'w') as zh:
|
||||
for file, data in self.test_files:
|
||||
zh.writestr(file, data)
|
||||
zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
|
||||
zinfos = list(zh.infolist())
|
||||
|
||||
zh.remove(self.test_files[0][0])
|
||||
|
||||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zinfos[1], zinfos[2]],
|
||||
[ComparableZipInfo(zi) for zi in [zinfos[1], zinfos[2]]],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
|
|
@ -1648,14 +1650,14 @@ def test_remove_mode_x(self):
|
|||
with zipfile.ZipFile(TESTFN, 'x') as zh:
|
||||
for file, data in self.test_files:
|
||||
zh.writestr(file, data)
|
||||
zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
|
||||
zinfos = list(zh.infolist())
|
||||
|
||||
zh.remove(self.test_files[0][0])
|
||||
|
||||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
[zinfos[1], zinfos[2]],
|
||||
[ComparableZipInfo(zi) for zi in [zinfos[1], zinfos[2]]],
|
||||
)
|
||||
|
||||
# check NameToInfo cache
|
||||
|
|
@ -1714,7 +1716,7 @@ def test_repack_basic(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -1766,7 +1768,7 @@ def test_repack_bytes_before_first_file(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -1800,7 +1802,7 @@ def test_repack_magic_before_first_file(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -1846,7 +1848,7 @@ def test_repack_file_entry_before_first_file(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -1856,6 +1858,7 @@ def test_repack_file_entry_before_first_file(self):
|
|||
with zipfile.ZipFile(TESTFN) as zh:
|
||||
self.assertIsNone(zh.testzip())
|
||||
|
||||
@mock.patch.object(time, 'time', new=lambda: 315504000) # fix time for ZipFile.writestr()
|
||||
def test_repack_bytes_before_removed_files(self):
|
||||
"""Should preserve if there are bytes before stale local file entries."""
|
||||
for ii in ([1], [1, 2], [2]):
|
||||
|
|
@ -1870,7 +1873,7 @@ def test_repack_bytes_before_removed_files(self):
|
|||
zh.writestr(file, data)
|
||||
for i in ii:
|
||||
zh.remove(self.test_files[i][0])
|
||||
expected_zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
|
||||
expected_zinfos = list(zh.infolist())
|
||||
expected_size = os.path.getsize(TESTFN)
|
||||
|
||||
# do the removal and check the result
|
||||
|
|
@ -1889,7 +1892,7 @@ def test_repack_bytes_before_removed_files(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -1899,6 +1902,7 @@ def test_repack_bytes_before_removed_files(self):
|
|||
with zipfile.ZipFile(TESTFN) as zh:
|
||||
self.assertIsNone(zh.testzip())
|
||||
|
||||
@mock.patch.object(time, 'time', new=lambda: 315504000) # fix time for ZipFile.writestr()
|
||||
def test_repack_bytes_after_removed_files(self):
|
||||
"""Should keep extra bytes if there are bytes after stale local file entries."""
|
||||
for ii in ([1], [1, 2], [2]):
|
||||
|
|
@ -1912,7 +1916,7 @@ def test_repack_bytes_after_removed_files(self):
|
|||
if i == ii[-1]:
|
||||
fh.write(b' dummy bytes ')
|
||||
zh.start_dir = fh.tell()
|
||||
expected_zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
|
||||
expected_zinfos = list(zh.infolist())
|
||||
expected_size = os.path.getsize(TESTFN)
|
||||
|
||||
# do the removal and check the result
|
||||
|
|
@ -1931,7 +1935,7 @@ def test_repack_bytes_after_removed_files(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -1941,6 +1945,7 @@ def test_repack_bytes_after_removed_files(self):
|
|||
with zipfile.ZipFile(TESTFN) as zh:
|
||||
self.assertIsNone(zh.testzip())
|
||||
|
||||
@mock.patch.object(time, 'time', new=lambda: 315504000) # fix time for ZipFile.writestr()
|
||||
def test_repack_bytes_between_removed_files(self):
|
||||
"""Should strip only local file entries before random bytes."""
|
||||
# calculate the expected results
|
||||
|
|
@ -1951,7 +1956,7 @@ def test_repack_bytes_between_removed_files(self):
|
|||
zh.start_dir = fh.tell()
|
||||
zh.writestr(*self.test_files[2])
|
||||
zh.remove(self.test_files[2][0])
|
||||
expected_zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
|
||||
expected_zinfos = list(zh.infolist())
|
||||
expected_size = os.path.getsize(TESTFN)
|
||||
|
||||
# do the removal and check the result
|
||||
|
|
@ -1970,7 +1975,7 @@ def test_repack_bytes_between_removed_files(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -1992,7 +1997,7 @@ def test_repack_prepended_bytes(self):
|
|||
fh.write(b'dummy ')
|
||||
fh.write(fz.read())
|
||||
with zipfile.ZipFile(TESTFN) as zh:
|
||||
expected_zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
|
||||
expected_zinfos = list(zh.infolist())
|
||||
expected_size = os.path.getsize(TESTFN)
|
||||
|
||||
# do the removal and check the result
|
||||
|
|
@ -2010,7 +2015,7 @@ def test_repack_prepended_bytes(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -2055,7 +2060,7 @@ def test_repack_removed_basic(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -2098,20 +2103,20 @@ def test_repack_removed_partial(self):
|
|||
with zipfile.ZipFile(TESTFN) as zh:
|
||||
self.assertIsNone(zh.testzip())
|
||||
|
||||
@mock.patch.object(time, 'time', new=lambda: 315504000) # fix time for ZipFile.writestr()
|
||||
def test_repack_removed_bytes_between_files(self):
|
||||
"""Should not remove bytes between local file entries."""
|
||||
for ii in ([0], [1], [2]):
|
||||
with self.subTest(removed=ii):
|
||||
# calculate the expected results
|
||||
expected_zinfos = []
|
||||
with open(TESTFN, 'wb') as fh:
|
||||
with zipfile.ZipFile(fh, 'w', self.compression) as zh:
|
||||
for j, (file, data) in enumerate(self.test_files):
|
||||
if j not in ii:
|
||||
zh.writestr(file, data)
|
||||
expected_zinfos.append(ComparableZipInfo(zh.getinfo(file)))
|
||||
fh.write(b' dummy bytes ')
|
||||
zh.start_dir = fh.tell()
|
||||
expected_zinfos = list(zh.infolist())
|
||||
expected_size = os.path.getsize(TESTFN)
|
||||
|
||||
# do the removal and check the result
|
||||
|
|
@ -2128,7 +2133,7 @@ def test_repack_removed_bytes_between_files(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
@ -2184,7 +2189,7 @@ def test_repack_removed_prepended_bytes(self):
|
|||
fh.write(b'dummy ')
|
||||
fh.write(fz.read())
|
||||
with zipfile.ZipFile(TESTFN) as zh:
|
||||
expected_zinfos = [ComparableZipInfo(zi) for zi in zh.infolist()]
|
||||
expected_zinfos = list(zh.infolist())
|
||||
expected_size = os.path.getsize(TESTFN)
|
||||
|
||||
# do the removal and check the result
|
||||
|
|
@ -2201,7 +2206,7 @@ def test_repack_removed_prepended_bytes(self):
|
|||
# check infolist
|
||||
self.assertEqual(
|
||||
[ComparableZipInfo(zi) for zi in zh.infolist()],
|
||||
expected_zinfos,
|
||||
[ComparableZipInfo(zi) for zi in expected_zinfos],
|
||||
)
|
||||
|
||||
# check file size
|
||||
|
|
|
|||
|
|
@ -1389,10 +1389,9 @@ def repack(self, zfile, removed=None):
|
|||
"""
|
||||
Repack the ZIP file, stripping unreferenced local file entries.
|
||||
|
||||
Assumes that local file entries are stored consecutively, with no gaps
|
||||
or overlaps.
|
||||
|
||||
Behavior:
|
||||
Assumes that local file entries (and the central directory, which is
|
||||
mostly treated as the "last entry") are stored consecutively, with no
|
||||
gaps or overlaps:
|
||||
|
||||
1. If any referenced entry overlaps with another, a `BadZipFile` error
|
||||
is raised since safe repacking cannot be guaranteed.
|
||||
|
|
@ -1405,8 +1404,8 @@ def repack(self, zfile, removed=None):
|
|||
be a sequence of consecutive entries with no extra preceding bytes;
|
||||
extra following bytes are preserved.
|
||||
|
||||
4. This is to prevent an unexpected data removal (false positive),
|
||||
though a false negative may happen in certain rare cases.
|
||||
This is to prevent an unexpected data removal (false positive), though
|
||||
a false negative may happen in certain rare cases.
|
||||
|
||||
Examples:
|
||||
|
||||
|
|
@ -1456,8 +1455,8 @@ def repack(self, zfile, removed=None):
|
|||
- Modifies the ZIP file in place.
|
||||
- Updates zfile.start_dir to account for removed data.
|
||||
- Sets zfile._didModify to True.
|
||||
- Updates header_offset and _end_offset of referenced ZipInfo
|
||||
instances.
|
||||
- Updates header_offset and clears _end_offset of referenced
|
||||
ZipInfo instances.
|
||||
|
||||
Parameters:
|
||||
zfile: A ZipFile object representing the archive to repack.
|
||||
|
|
@ -1559,14 +1558,8 @@ def repack(self, zfile, removed=None):
|
|||
zfile.start_dir -= entry_offset
|
||||
zfile._didModify = True
|
||||
|
||||
end_offset = zfile.start_dir
|
||||
for zinfo in reversed(filelist):
|
||||
if zinfo in removed_zinfos:
|
||||
for zinfo in filelist:
|
||||
zinfo._end_offset = None
|
||||
else:
|
||||
if zinfo._end_offset is not None:
|
||||
zinfo._end_offset = end_offset
|
||||
end_offset = zinfo.header_offset
|
||||
|
||||
def _calc_initial_entry_offset(self, fp, data_offset):
|
||||
checked_offsets = {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue