Merge e926a954ed into 7099af8f5e

2025-12-08 06:10:17 +00:00 · 2025-12-08 06:11:59 +02:00 · 2025-12-08 06:11:59 +02:00 · 7cfc20ff91
commit 7cfc20ff91
parent 7099af8f5e e926a954ed
5 changed files with 2773 additions and 0 deletions
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@ -548,6 +548,69 @@ ZipFile objects
   .. versionadded:: 3.11
 .. method:: ZipFile.remove(zinfo_or_arcname)
   Removes a member entry from the archive's central directory.
   *zinfo_or_arcname* may be the full path of the member or a :class:`ZipInfo`
   instance.  If multiple members share the same full path and the path is
   provided, only one of them is removed.
   The archive must be opened with mode ``'w'``, ``'x'`` or ``'a'``.
   Returns the removed :class:`ZipInfo` instance.
   Calling :meth:`remove` on a closed ZipFile will raise a :exc:`ValueError`.
   .. note::
      This method only removes the member's entry from the central directory,
      making it inaccessible to most tools.  The member's local file entry,
      including content and metadata, remains in the archive and is still
      recoverable using forensic tools.  Call :meth:`repack` afterwards to
      completely remove the member and reclaim space.
   .. versionadded:: next
 .. method:: ZipFile.repack(removed=None, *, \
                           strict_descriptor=False[, chunk_size])
   Rewrites the archive to remove unreferenced local file entries, shrinking
   its file size.  The archive must be opened with mode ``'a'``.
   If *removed* is provided, it must be a sequence of :class:`ZipInfo` objects
   representing the recently removed members, and only their corresponding
   local file entries will be removed.  Otherwise, the archive is scanned to
   locate and remove local file entries that are no longer referenced in the
   central directory.
   When scanning, setting ``strict_descriptor=True`` disables detection of any
   entry using an unsigned data descriptor (a format deprecated by the ZIP
   specification since version 6.3.0, released on 2006-09-29, and used only by
   some legacy tools), which is significantly slower to scan—around 100 to
   1000 times in the worst case. This does not affect performance on entries
   without such feature.
   *chunk_size* may be specified to control the buffer size when moving
   entry data (default is 1 MiB).
   Calling :meth:`repack` on a closed ZipFile will raise a :exc:`ValueError`.
   .. note::
      The scanning algorithm is heuristic-based and assumes that the ZIP file
      is normally structured—for example, with local file entries stored
      consecutively, without overlap or interleaved binary data.  Prepended
      binary data, such as a self-extractor stub, is recognized and preserved
      unless it happens to contain bytes that coincidentally resemble a valid
      local file entry in multiple respects—an extremely rare case. Embedded
      ZIP payloads are also handled correctly, as long as they follow normal
      structure.  However, the algorithm does not guarantee correctness or
      safety on untrusted or intentionally crafted input.  It is generally
      recommended to provide the *removed* argument for better reliability and
      performance.
   .. versionadded:: next
 The following data attributes are also available:
 .. attribute:: ZipFile.filename
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
--- a/Lib/test/test_zipfile64.py
+++ b/Lib/test/test_zipfile64.py
@ -13,12 +13,16 @@
 import zipfile, unittest
 import time
 import tracemalloc
 import sys
 import unittest.mock as mock
 from tempfile import TemporaryFile
 from test.support import os_helper
 from test.support import requires_zlib
 from test.test_zipfile.test_core import Unseekable
 from test.test_zipfile.test_core import struct_pack_no_dd_sig
 TESTFN = os_helper.TESTFN
 TESTFN2 = TESTFN + "2"
@ -87,6 +91,174 @@ def tearDown(self):
        os_helper.unlink(TESTFN2)
 class TestRepack(unittest.TestCase):
    def setUp(self):
        # Create test data.
        line_gen = ("Test of zipfile line %d." % i for i in range(1000000))
        self.data = '\n'.join(line_gen).encode('ascii')
        # It will contain enough copies of self.data to reach about 8 GiB.
        self.datacount = 8*1024**3 // len(self.data)
        # memory usage should not exceed 10 MiB
        self.allowed_memory = 10*1024**2
    def _write_large_file(self, fh):
        next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
        for num in range(self.datacount):
            fh.write(self.data)
            # Print still working message since this test can be really slow
            if next_time <= time.monotonic():
                next_time = time.monotonic() + _PRINT_WORKING_MSG_INTERVAL
                print((
                '  writing %d of %d, be patient...' %
                (num, self.datacount)), file=sys.__stdout__)
                sys.__stdout__.flush()
    def test_strip_removed_large_file(self):
        """Should move the physical data of a file positioned after a large
        removed file without causing a memory issue."""
        # Try the temp file.  If we do TESTFN2, then it hogs
        # gigabytes of disk space for the duration of the test.
        with TemporaryFile() as f:
            tracemalloc.start()
            self._test_strip_removed_large_file(f)
            self.assertFalse(f.closed)
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            self.assertLess(peak, self.allowed_memory)
    def _test_strip_removed_large_file(self, f):
        file = 'file.txt'
        file1 = 'largefile.txt'
        data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
        with zipfile.ZipFile(f, 'w') as zh:
            with zh.open(file1, 'w', force_zip64=True) as fh:
                self._write_large_file(fh)
            zh.writestr(file, data)
        with zipfile.ZipFile(f, 'a') as zh:
            zh.remove(file1)
            zh.repack()
            self.assertIsNone(zh.testzip())
    def test_strip_removed_file_before_large_file(self):
        """Should move the physical data of a large file positioned after a
        removed file without causing a memory issue."""
        # Try the temp file.  If we do TESTFN2, then it hogs
        # gigabytes of disk space for the duration of the test.
        with TemporaryFile() as f:
            tracemalloc.start()
            self._test_strip_removed_file_before_large_file(f)
            self.assertFalse(f.closed)
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            self.assertLess(peak, self.allowed_memory)
    def _test_strip_removed_file_before_large_file(self, f):
        file = 'file.txt'
        file1 = 'largefile.txt'
        data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
        with zipfile.ZipFile(f, 'w') as zh:
            zh.writestr(file, data)
            with zh.open(file1, 'w', force_zip64=True) as fh:
                self._write_large_file(fh)
        with zipfile.ZipFile(f, 'a') as zh:
            zh.remove(file)
            zh.repack()
            self.assertIsNone(zh.testzip())
    def test_strip_removed_large_file_with_dd(self):
        """Should scan for the data descriptor of a removed large file without
        causing a memory issue."""
        # Try the temp file.  If we do TESTFN2, then it hogs
        # gigabytes of disk space for the duration of the test.
        with TemporaryFile() as f:
            tracemalloc.start()
            self._test_strip_removed_large_file_with_dd(f)
            self.assertFalse(f.closed)
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            self.assertLess(peak, self.allowed_memory)
    def _test_strip_removed_large_file_with_dd(self, f):
        file = 'file.txt'
        file1 = 'largefile.txt'
        data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
        with zipfile.ZipFile(Unseekable(f), 'w') as zh:
            with zh.open(file1, 'w', force_zip64=True) as fh:
                self._write_large_file(fh)
            zh.writestr(file, data)
        with zipfile.ZipFile(f, 'a') as zh:
            zh.remove(file1)
            zh.repack()
            self.assertIsNone(zh.testzip())
    def test_strip_removed_large_file_with_dd_no_sig(self):
        """Should scan for the data descriptor (without signature) of a removed
        large file without causing a memory issue."""
        # Reduce data scale for this test, as it's especially slow...
        self.datacount = 30*1024**2 // len(self.data)
        self.allowed_memory = 200*1024
        # Try the temp file.  If we do TESTFN2, then it hogs
        # gigabytes of disk space for the duration of the test.
        with TemporaryFile() as f:
            tracemalloc.start()
            self._test_strip_removed_large_file_with_dd_no_sig(f)
            self.assertFalse(f.closed)
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            self.assertLess(peak, self.allowed_memory)
    def _test_strip_removed_large_file_with_dd_no_sig(self, f):
        file = 'file.txt'
        file1 = 'largefile.txt'
        data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
        with mock.patch('zipfile.struct.pack', side_effect=struct_pack_no_dd_sig):
            with zipfile.ZipFile(Unseekable(f), 'w') as zh:
                with zh.open(file1, 'w', force_zip64=True) as fh:
                    self._write_large_file(fh)
                zh.writestr(file, data)
        with zipfile.ZipFile(f, 'a') as zh:
            zh.remove(file1)
            zh.repack()
            self.assertIsNone(zh.testzip())
    @requires_zlib()
    def test_strip_removed_large_file_with_dd_no_sig_by_decompression(self):
        """Should scan for the data descriptor (without signature) of a removed
        large file without causing a memory issue."""
        # Try the temp file.  If we do TESTFN2, then it hogs
        # gigabytes of disk space for the duration of the test.
        with TemporaryFile() as f:
            tracemalloc.start()
            self._test_strip_removed_large_file_with_dd_no_sig_by_decompression(
                f, zipfile.ZIP_DEFLATED)
            self.assertFalse(f.closed)
            current, peak = tracemalloc.get_traced_memory()
            tracemalloc.stop()
            self.assertLess(peak, self.allowed_memory)
    def _test_strip_removed_large_file_with_dd_no_sig_by_decompression(self, f, method):
        file = 'file.txt'
        file1 = 'largefile.txt'
        data = b'Sed ut perspiciatis unde omnis iste natus error sit voluptatem'
        with mock.patch('zipfile.struct.pack', side_effect=struct_pack_no_dd_sig):
            with zipfile.ZipFile(Unseekable(f), 'w', compression=method) as zh:
                with zh.open(file1, 'w', force_zip64=True) as fh:
                    self._write_large_file(fh)
                zh.writestr(file, data)
        with zipfile.ZipFile(f, 'a') as zh:
            zh.remove(file1)
            zh.repack()
            self.assertIsNone(zh.testzip())
 class OtherTests(unittest.TestCase):
    def testMoreThan64kFiles(self):
        # This test checks that more than 64k files can be added to an archive,
--- a/Lib/zipfile/init.py
+++ b/Lib/zipfile/init.py
@ -778,6 +778,13 @@ def __init__(self):
        self._unconsumed = b''
        self.eof = False
    @property
    def unused_data(self):
        try:
            return self._decomp.unused_data
        except AttributeError:
            return b''
    def decompress(self, data):
        if self._decomp is None:
            self._unconsumed += data
@ -1380,6 +1387,461 @@ def close(self):
            self._zipfile._writing = False
 class _ZipRepacker:
    """Class for ZipFile repacking."""
    def __init__(self, *, strict_descriptor=False, chunk_size=2**20, debug=0):
        self.debug = debug  # Level of printing: 0 through 3
        self.chunk_size = chunk_size
        self.strict_descriptor = strict_descriptor
    def _debug(self, level, *msg):
        if self.debug >= level:
            print(*msg)
    def repack(self, zfile, removed=None):
        """
        Repack the ZIP file, stripping unreferenced local file entries.
        Assumes that local file entries (and the central directory, which is
        mostly treated as the "last entry") are stored consecutively, with no
        gaps or overlaps:
        1. If any referenced entry overlaps with another, a `BadZipFile` error
           is raised since safe repacking cannot be guaranteed.
        2. Data before the first referenced entry is stripped only when it
           appears to be a sequence of consecutive entries with no extra
           following bytes; extra preceeding bytes are preserved.
        3. Data between referenced entries is stripped only when it appears to
           be a sequence of consecutive entries with no extra preceding bytes;
           extra following bytes are preserved.
        This is to prevent an unexpected data removal (false positive), though
        a false negative may happen in certain rare cases.
        Examples:
        Stripping before the first referenced entry:
            [random bytes]
            [unreferenced local file entry]
            [random bytes]
            <-- stripping start
            [unreferenced local file entry]
            [unreferenced local file entry]
            <-- stripping end
            [local file entry 1] (or central directory)
            ...
        Stripping between referenced entries:
            ...
            [local file entry]
            <-- stripping start
            [unreferenced local file entry]
            [unreferenced local file entry]
            <-- stripping end
            [random bytes]
            [unreferenced local file entry]
            [random bytes]
            [local file entry] (or central directory)
            ...
        No stripping:
            [unreferenced local file entry]
            [random bytes]
            [local file entry 1] (or central directory)
            ...
        No stripping:
            ...
            [local file entry]
            [random bytes]
            [unreferenced local file entry]
            [local file entry] (or central directory)
            ...
        Side effects:
            - Modifies the ZIP file in place.
            - Updates zfile.start_dir to account for removed data.
            - Sets zfile._didModify to True.
            - Updates header_offset and clears _end_offset of referenced
              ZipInfo instances.
        Parameters:
            zfile: A ZipFile object representing the archive to repack.
            removed: Optional. A sequence of ZipInfo instances representing
                the previously removed entries. When provided, only their
                corresponding local file entries are stripped.
        """
        removed_zinfos = set(removed or ())
        fp = zfile.fp
        # get a sorted filelist by header offset, in case the dir order
        # doesn't match the actual entry order
        filelist = (*zfile.filelist, *removed_zinfos)
        filelist = sorted(filelist, key=lambda x: x.header_offset)
        # calculate each entry size and validate
        entry_size_list = []
        used_entry_size_list = []
        for i, zinfo in enumerate(filelist):
            try:
                offset = filelist[i + 1].header_offset
            except IndexError:
                offset = zfile.start_dir
            entry_size = offset - zinfo.header_offset
            # may raise on an invalid local file header
            used_entry_size = self._calc_local_file_entry_size(fp, zinfo)
            self._debug(3, 'entry:', i, zinfo.orig_filename,
                        zinfo.header_offset, entry_size, used_entry_size)
            if used_entry_size > entry_size:
                raise BadZipFile(
                    f"Overlapped entries: {zinfo.orig_filename!r} ")
            if removed is not None and zinfo not in removed_zinfos:
                used_entry_size = entry_size
            entry_size_list.append(entry_size)
            used_entry_size_list.append(used_entry_size)
        # calculate the starting entry offset (bytes to skip)
        if removed is None:
            try:
                offset = filelist[0].header_offset
            except IndexError:
                offset = zfile.start_dir
            entry_offset = self._calc_initial_entry_offset(fp, offset)
        else:
            entry_offset = 0
        # move file entries
        for i, zinfo in enumerate(filelist):
            entry_size = entry_size_list[i]
            used_entry_size = used_entry_size_list[i]
            # update the header and move entry data to the new position
            old_header_offset = zinfo.header_offset
            zinfo.header_offset -= entry_offset
            if zinfo in removed_zinfos:
                self._copy_bytes(
                    fp,
                    old_header_offset + used_entry_size,
                    zinfo.header_offset,
                    entry_size - used_entry_size,
                )
                # update entry_offset for subsequent files to follow
                entry_offset += used_entry_size
            else:
                if entry_offset > 0:
                    self._copy_bytes(
                        fp,
                        old_header_offset,
                        zinfo.header_offset,
                        used_entry_size,
                    )
                stale_entry_size = self._validate_local_file_entry_sequence(
                    fp,
                    old_header_offset + used_entry_size,
                    old_header_offset + entry_size,
                )
                if stale_entry_size > 0:
                    self._copy_bytes(
                        fp,
                        old_header_offset + used_entry_size + stale_entry_size,
                        zinfo.header_offset + used_entry_size,
                        entry_size - used_entry_size - stale_entry_size,
                    )
                    # update entry_offset for subsequent files to follow
                    entry_offset += stale_entry_size
        # update state
        zfile.start_dir -= entry_offset
        zfile._didModify = True
        for zinfo in filelist:
            zinfo._end_offset = None
    def _calc_initial_entry_offset(self, fp, data_offset):
        checked_offsets = {}
        if data_offset > 0:
            self._debug(3, 'scanning file signatures before:', data_offset)
            for pos in self._iter_scan_signature(fp, stringFileHeader, 0, data_offset):
                self._debug(3, 'checking file signature at:', pos)
                entry_size = self._validate_local_file_entry_sequence(
                    fp, pos, data_offset, checked_offsets)
                if entry_size == data_offset - pos:
                    return entry_size
        return 0
    def _iter_scan_signature(self, fp, signature, start_offset, end_offset,
                             chunk_size=io.DEFAULT_BUFFER_SIZE):
        sig_len = len(signature)
        remainder = b''
        pos = start_offset
        while pos < end_offset:
            # required for each loop since fp may be changed during each yield
            fp.seek(pos)
            chunk = remainder + fp.read(min(chunk_size, end_offset - pos))
            delta = pos - len(remainder)
            idx = 0
            while True:
                idx = chunk.find(signature, idx)
                if idx == -1:
                    break
                yield delta + idx
                idx += 1
            remainder = chunk[-(sig_len - 1):]
            pos += chunk_size
    def _validate_local_file_entry_sequence(self, fp, start_offset, end_offset, checked_offsets=None):
        offset = start_offset
        while offset < end_offset:
            self._debug(3, 'checking local file entry at:', offset)
            # Cache checked offsets to improve performance.
            try:
                entry_size = checked_offsets[offset]
            except (KeyError, TypeError):
                entry_size = self._validate_local_file_entry(fp, offset, end_offset)
                if checked_offsets is not None:
                    checked_offsets[offset] = entry_size
            else:
                self._debug(3, 'read from checked cache:', offset)
            if entry_size is None:
                break
            offset += entry_size
        return offset - start_offset
    def _validate_local_file_entry(self, fp, offset, end_offset):
        fp.seek(offset)
        try:
            fheader = self._read_local_file_header(fp)
        except BadZipFile:
            return None
        # Create a dummy ZipInfo to utilize parsing.
        # Flush only the required information.
        zinfo = ZipInfo()
        zinfo.header_offset = offset
        zinfo.flag_bits = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
        zinfo.compress_size = fheader[_FH_COMPRESSED_SIZE]
        zinfo.file_size = fheader[_FH_UNCOMPRESSED_SIZE]
        zinfo.CRC = fheader[_FH_CRC]
        filename = fp.read(fheader[_FH_FILENAME_LENGTH])
        zinfo.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
        pos = fp.tell()
        if pos > end_offset:
            return None
        # parse zip64
        try:
            zinfo._decodeExtra(crc32(filename))
        except BadZipFile:
            return None
        dd_size = 0
        if zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
            # According to the spec, these fields should be zero when data
            # descriptor is used. Otherwise treat as a false positive on
            # random bytes to return early, as scanning for data descriptor
            # is rather expensive.
            if not (zinfo.CRC == zinfo.compress_size == zinfo.file_size == 0):
                return None
            zip64 = fheader[_FH_UNCOMPRESSED_SIZE] == 0xffffffff
            dd = self._scan_data_descriptor(fp, pos, end_offset, zip64)
            if dd is None and not self.strict_descriptor:
                if zinfo.flag_bits & _MASK_ENCRYPTED:
                    dd = False
                else:
                    dd = self._scan_data_descriptor_no_sig_by_decompression(
                        fp, pos, end_offset, zip64, fheader[_FH_COMPRESSION_METHOD])
                if dd is False:
                    dd = self._scan_data_descriptor_no_sig(fp, pos, end_offset, zip64)
            if dd is None:
                return None
            zinfo.CRC, zinfo.compress_size, zinfo.file_size, dd_size = dd
        return (
            sizeFileHeader +
            fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH] +
            zinfo.compress_size +
            dd_size
        )
    def _read_local_file_header(self, fp):
        fheader = fp.read(sizeFileHeader)
        if len(fheader) != sizeFileHeader:
            raise BadZipFile("Truncated file header")
        fheader = struct.unpack(structFileHeader, fheader)
        if fheader[_FH_SIGNATURE] != stringFileHeader:
            raise BadZipFile("Bad magic number for file header")
        return fheader
    def _scan_data_descriptor(self, fp, offset, end_offset, zip64):
        dd_fmt = '<LLQQ' if zip64 else '<LLLL'
        dd_size = struct.calcsize(dd_fmt)
        # scan for signature and take the first valid descriptor
        for pos in self._iter_scan_signature(
            fp, struct.pack('<L', _DD_SIGNATURE), offset, end_offset
        ):
            fp.seek(pos)
            dd = fp.read(min(dd_size, end_offset - pos))
            try:
                _, crc, compress_size, file_size = struct.unpack(dd_fmt, dd)
            except struct.error:
                continue
            # @TODO: also check CRC to better guard from a false positive?
            if pos - offset != compress_size:
                continue
            return crc, compress_size, file_size, dd_size
        return None
    def _scan_data_descriptor_no_sig(self, fp, offset, end_offset, zip64, chunk_size=8192):
        dd_fmt = '<LQQ' if zip64 else '<LLL'
        dd_size = struct.calcsize(dd_fmt)
        pos = offset
        remainder = b''
        fp.seek(offset)
        while pos < end_offset:
            chunk = remainder + fp.read(min(chunk_size, end_offset - pos))
            delta = pos - len(remainder) - offset
            mv = memoryview(chunk)
            for i in range(len(chunk) - dd_size + 1):
                dd = mv[i:i + dd_size]
                try:
                    crc, compress_size, file_size = struct.unpack(dd_fmt, dd)
                except struct.error:
                    continue
                if delta + i != compress_size:
                    continue
                return crc, compress_size, file_size, dd_size
            remainder = chunk[-(dd_size - 1):]
            pos += chunk_size
        return None
    def _scan_data_descriptor_no_sig_by_decompression(self, fp, offset, end_offset, zip64, method):
        try:
            decompressor = _get_decompressor(method)
        except RuntimeError:
            return False
        if decompressor is None:
            return False
        dd_fmt = '<LQQ' if zip64 else '<LLL'
        dd_size = struct.calcsize(dd_fmt)
        # early return and prevent potential `fp.read(-1)`
        if end_offset - dd_size < offset:
            return None
        try:
            pos = self._trace_compressed_block_end(fp, offset, end_offset - dd_size, decompressor)
        except Exception:
            return None
        fp.seek(pos)
        dd = fp.read(dd_size)
        try:
            crc, compress_size, file_size = struct.unpack(dd_fmt, dd)
        except struct.error:
            return None
        if pos - offset != compress_size:
            return None
        return crc, compress_size, file_size, dd_size
    def _trace_compressed_block_end(self, fp, offset, end_offset, decompressor,
                                    chunk_size=io.DEFAULT_BUFFER_SIZE):
        fp.seek(offset)
        read_size = 0
        while True:
            chunk = fp.read(min(chunk_size, end_offset - offset - read_size))
            if not chunk:
                raise EOFError('Unexpected EOF while decompressing')
            # may raise on error
            decompressor.decompress(chunk)
            read_size += len(chunk)
            if decompressor.eof:
                unused_len = len(decompressor.unused_data)
                return offset + read_size - unused_len
    def _calc_local_file_entry_size(self, fp, zinfo):
        fp.seek(zinfo.header_offset)
        fheader = self._read_local_file_header(fp)
        if zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
            zip64 = fheader[_FH_UNCOMPRESSED_SIZE] == 0xffffffff
            dd_fmt = '<LLQQ' if zip64 else '<LLLL'
            fp.seek(
                fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH] +
                zinfo.compress_size,
                os.SEEK_CUR,
            )
            if fp.read(struct.calcsize('<L')) != struct.pack('<L', _DD_SIGNATURE):
                dd_fmt = '<LQQ' if zip64 else '<LLL'
            dd_size = struct.calcsize(dd_fmt)
        else:
            dd_size = 0
        return (
            sizeFileHeader +
            fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH] +
            zinfo.compress_size +
            dd_size
        )
    def _copy_bytes(self, fp, old_offset, new_offset, size):
        read_size = 0
        while read_size < size:
            fp.seek(old_offset + read_size)
            data = fp.read(min(size - read_size, self.chunk_size))
            fp.seek(new_offset + read_size)
            fp.write(data)
            fp.flush()
            read_size += len(data)
 class ZipFile:
    """ Class with methods to open, read, write, close, list zip files.
@ -1866,6 +2328,72 @@ def extractall(self, path=None, members=None, pwd=None):
        for zipinfo in members:
            self._extract_member(zipinfo, path, pwd)
    def remove(self, zinfo_or_arcname):
        """Remove a member from the archive."""
        if self.mode not in ('w', 'x', 'a'):
            raise ValueError("remove() requires mode 'w', 'x', or 'a'")
        if not self.fp:
            raise ValueError(
                "Attempt to write to ZIP archive that was already closed")
        if self._writing:
            raise ValueError(
                "Can't write to ZIP archive while an open writing handle exists."
            )
        with self._lock:
            # get the zinfo
            if isinstance(zinfo_or_arcname, ZipInfo):
                zinfo = zinfo_or_arcname
            else:
                # raise KeyError if arcname does not exist
                zinfo = self.getinfo(zinfo_or_arcname)
            try:
                self.filelist.remove(zinfo)
            except ValueError:
                raise KeyError('There is no item %r in the archive' % zinfo) from None
            try:
                del self.NameToInfo[zinfo.filename]
            except KeyError:
                pass
            # Avoid missing entry if there is another entry having the same name,
            # to prevent an error on `testzip()`.
            # Reverse the order as NameToInfo normally stores the last added one.
            for zi in reversed(self.filelist):
                if zi.filename == zinfo.filename:
                    self.NameToInfo.setdefault(zi.filename, zi)
                    break
            self._didModify = True
        return zinfo
    def repack(self, removed=None, **opts):
        """Repack a zip file, removing non-referenced file entries.
        The archive must be opened with mode 'a', as mode 'w'/'x' do not
        truncate the file when closed. This cannot be simplely changed as
        they may be used on an unseekable file buffer, which disallows
        truncation."""
        if self.mode != 'a':
            raise ValueError("repack() requires mode 'a'")
        if not self.fp:
            raise ValueError(
                "Attempt to write to ZIP archive that was already closed")
        if self._writing:
            raise ValueError(
                "Can't write to ZIP archive while an open writing handle exists"
            )
        with self._lock:
            self._writing = True
            try:
                _ZipRepacker(**opts).repack(self, removed)
            finally:
                self._writing = False
    @classmethod
    def _sanitize_windows_name(cls, arcname, pathsep):
        """Replace bad characters and remove trailing dots from parts."""
--- a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-24-11-17-34.gh-issue-51067.yHOgfy.rst
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-24-11-17-34.gh-issue-51067.yHOgfy.rst
@ -0,0 +1 @@
 Add :meth:`~zipfile.ZipFile.remove` and :meth:`~zipfile.ZipFile.repack` to :class:`~zipfile.ZipFile`.
		`@ -0,0 +1 @@`
							Add :meth:`~zipfile.ZipFile.remove` and :meth:`~zipfile.ZipFile.repack` to :class:`~zipfile.ZipFile`.