From 8793b215253bf69cc699fab77b12d7f1313360d8 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 7 Oct 2016 22:20:50 +0300 Subject: [PATCH] Issue #26293: Fixed writing ZIP files that starts not from the start of the file. Offsets in ZIP file now are relative to the start of the archive in conforming to the specification. --- Lib/test/test_zipfile.py | 43 ++++++++++++++++++++++++++++++++++++++++ Lib/zipfile.py | 30 ++++++++++++++-------------- Misc/NEWS | 4 ++++ 3 files changed, 62 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index d278e06a453..d18a77017fa 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -415,6 +415,49 @@ def test_append_to_non_zip_file(self): f.seek(len(data)) with zipfile.ZipFile(f, "r") as zipfp: self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + with open(TESTFN2, 'rb') as f: + self.assertEqual(f.read(len(data)), data) + zipfiledata = f.read() + with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + + def test_read_concatenated_zip_file(self): + with io.BytesIO() as bio: + with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.write(TESTFN, TESTFN) + zipfiledata = bio.getvalue() + data = b'I am not a ZipFile!'*10 + with open(TESTFN2, 'wb') as f: + f.write(data) + f.write(zipfiledata) + + with zipfile.ZipFile(TESTFN2) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + + def test_append_to_concatenated_zip_file(self): + with io.BytesIO() as bio: + with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.write(TESTFN, TESTFN) + zipfiledata = bio.getvalue() + data = b'I am not a ZipFile!'*1000000 + with open(TESTFN2, 'wb') as f: + f.write(data) + f.write(zipfiledata) + + with zipfile.ZipFile(TESTFN2, 'a') as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + zipfp.writestr('strfile', self.data) + + with open(TESTFN2, 'rb') as f: + self.assertEqual(f.read(len(data)), data) + zipfiledata = f.read() + with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile']) + self.assertEqual(zipfp.read(TESTFN), self.data) + self.assertEqual(zipfp.read('strfile'), self.data) def test_ignores_newline_at_end(self): with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp: diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 56a2479fb38..2476717c963 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -1029,10 +1029,10 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True): # even if no files are added to the archive self._didModify = True try: - self.start_dir = self.fp.tell() + self.start_dir = self._start_disk = self.fp.tell() except (AttributeError, OSError): self.fp = _Tellable(self.fp) - self.start_dir = 0 + self.start_dir = self._start_disk = 0 self._seekable = False else: # Some file-like objects can provide tell() but not seek() @@ -1053,7 +1053,7 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True): # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True - self.start_dir = self.fp.tell() + self.start_dir = self._start_disk = self.fp.tell() else: raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'") except: @@ -1097,17 +1097,18 @@ def _RealGetContents(self): offset_cd = endrec[_ECD_OFFSET] # offset of central directory self._comment = endrec[_ECD_COMMENT] # archive comment - # "concat" is zero, unless zip was concatenated to another file - concat = endrec[_ECD_LOCATION] - size_cd - offset_cd + # self._start_disk: Position of the start of ZIP archive + # It is zero, unless ZIP was concatenated to another file + self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd if endrec[_ECD_SIGNATURE] == stringEndArchive64: # If Zip64 extension structures are present, account for them - concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator) if self.debug > 2: - inferred = concat + offset_cd - print("given, inferred, offset", offset_cd, inferred, concat) + inferred = self._start_disk + offset_cd + print("given, inferred, offset", offset_cd, inferred, self._start_disk) # self.start_dir: Position of start of central directory - self.start_dir = offset_cd + concat + self.start_dir = offset_cd + self._start_disk fp.seek(self.start_dir, 0) data = fp.read(size_cd) fp = io.BytesIO(data) @@ -1147,7 +1148,7 @@ def _RealGetContents(self): t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) x._decodeExtra() - x.header_offset = x.header_offset + concat + x.header_offset = x.header_offset + self._start_disk self.filelist.append(x) self.NameToInfo[x.filename] = x @@ -1627,11 +1628,10 @@ def _write_end_record(self): file_size = zinfo.file_size compress_size = zinfo.compress_size - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) + header_offset = zinfo.header_offset - self._start_disk + if header_offset > ZIP64_LIMIT: + extra.append(header_offset) header_offset = 0xffffffff - else: - header_offset = zinfo.header_offset extra_data = zinfo.extra min_version = 0 @@ -1678,7 +1678,7 @@ def _write_end_record(self): # Write end-of-zip-archive record centDirCount = len(self.filelist) centDirSize = pos2 - self.start_dir - centDirOffset = self.start_dir + centDirOffset = self.start_dir - self._start_disk requires_zip64 = None if centDirCount > ZIP_FILECOUNT_LIMIT: requires_zip64 = "Files count" diff --git a/Misc/NEWS b/Misc/NEWS index 369aa8b03f6..36a7f30a436 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -92,6 +92,10 @@ Core and Builtins Library ------- +- Issue #26293: Fixed writing ZIP files that starts not from the start of the + file. Offsets in ZIP file now are relative to the start of the archive in + conforming to the specification. + - Issue #28321: Fixed writing non-BMP characters with binary format in plistlib. - Issue #28322: Fixed possible crashes when unpickle itertools objects from