mirror of
https://github.com/python/cpython.git
synced 2026-06-27 19:36:07 +00:00
[3.13] gh-84353: Preserve non-UTF-8 filenames when appending to ZipFile (GH-150091) (GH-150529)
Preserve non-UTF-8 filenames when appending to a ZipFile.
---------
(cherry picked from commit 24c6bbc92b)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
parent
68840510e3
commit
dad9bec4fa
3 changed files with 35 additions and 18 deletions
|
|
@ -3521,29 +3521,23 @@ def test_read_with_unsuitable_metadata_encoding(self):
|
|||
|
||||
def test_read_after_append(self):
|
||||
newname = '\u56db' # Han 'four'
|
||||
expected_names = [name.encode('shift_jis').decode('cp437')
|
||||
for name in self.file_names[:2]] + self.file_names[2:]
|
||||
expected_names.append(newname)
|
||||
expected_content = (*self.file_content, b"newcontent")
|
||||
newname2 = 'fünf' # representable in cp437, but still stored as UTF-8
|
||||
expected_names = [*self.file_names, newname, newname2]
|
||||
mojibake_expected_names = [name.encode('shift_jis').decode('cp437')
|
||||
if i < 2 else name
|
||||
for i, name in enumerate(expected_names)]
|
||||
expected_content = (*self.file_content, b"newcontent", b"newcontent2")
|
||||
|
||||
with zipfile.ZipFile(TESTFN, "a") as zipfp:
|
||||
zipfp.writestr(newname, "newcontent")
|
||||
self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))
|
||||
zipfp.writestr(newname2, "newcontent2")
|
||||
self.assertEqual(sorted(zipfp.namelist()), sorted(mojibake_expected_names))
|
||||
|
||||
with zipfile.ZipFile(TESTFN, "r") as zipfp:
|
||||
self._test_read(zipfp, expected_names, expected_content)
|
||||
self._test_read(zipfp, mojibake_expected_names, expected_content)
|
||||
|
||||
with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp:
|
||||
self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names))
|
||||
for i, (name, content) in enumerate(zip(expected_names, expected_content)):
|
||||
info = zipfp.getinfo(name)
|
||||
self.assertEqual(info.filename, name)
|
||||
self.assertEqual(info.file_size, len(content))
|
||||
if i < 2:
|
||||
with self.assertRaises(zipfile.BadZipFile):
|
||||
zipfp.read(name)
|
||||
else:
|
||||
self.assertEqual(zipfp.read(name), content)
|
||||
self._test_read(zipfp, expected_names, expected_content)
|
||||
|
||||
def test_write_with_metadata_encoding(self):
|
||||
ZF = zipfile.ZipFile
|
||||
|
|
@ -3552,6 +3546,20 @@ def test_write_with_metadata_encoding(self):
|
|||
"^metadata_encoding is only"):
|
||||
ZF("nonesuch.zip", mode, metadata_encoding="shift_jis")
|
||||
|
||||
def test_add_comment(self):
|
||||
with zipfile.ZipFile(TESTFN, "r") as zipfp:
|
||||
mojibake_expected_names = zipfp.namelist()
|
||||
|
||||
with zipfile.ZipFile(TESTFN, "a") as zipfp:
|
||||
zipfp.comment = b'comment'
|
||||
self.assertEqual(zipfp.namelist(), mojibake_expected_names)
|
||||
|
||||
with zipfile.ZipFile(TESTFN, "r") as zipfp:
|
||||
self._test_read(zipfp, mojibake_expected_names, self.file_content)
|
||||
|
||||
with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp:
|
||||
self._test_read(zipfp, self.file_names, self.file_content)
|
||||
|
||||
def test_cli_with_metadata_encoding(self):
|
||||
errmsg = "Non-conforming encodings not supported with -c."
|
||||
args = ["--metadata-encoding=shift_jis", "-c", "nonesuch", "nonesuch"]
|
||||
|
|
|
|||
|
|
@ -531,8 +531,12 @@ def FileHeader(self, zip64=None):
|
|||
return header + filename + extra
|
||||
|
||||
def _encodeFilenameFlags(self):
|
||||
if self.flag_bits & _MASK_UTF_FILENAME:
|
||||
encoding = 'ascii'
|
||||
else:
|
||||
encoding = 'cp437'
|
||||
try:
|
||||
return self.filename.encode('ascii'), self.flag_bits
|
||||
return self.filename.encode(encoding), self.flag_bits & ~_MASK_UTF_FILENAME
|
||||
except UnicodeEncodeError:
|
||||
return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
|
||||
|
||||
|
|
@ -1742,7 +1746,7 @@ def _open_to_write(self, zinfo, force_zip64=False):
|
|||
zinfo.compress_size = 0
|
||||
zinfo.CRC = 0
|
||||
|
||||
zinfo.flag_bits = 0x00
|
||||
zinfo.flag_bits = _MASK_UTF_FILENAME
|
||||
if zinfo.compress_type == ZIP_LZMA:
|
||||
# Compressed data includes an end-of-stream (EOS) marker
|
||||
zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
Preserve non-UTF-8 encoded filenames when appending to a
|
||||
:class:`zipfile.ZipFile`. Previously, non-ASCII names stored in a legacy
|
||||
encoding (without the UTF-8 flag bit set) could be corrupted when the
|
||||
central directory was rewritten: they were decoded as cp437 and then
|
||||
re-stored as UTF-8.
|
||||
Loading…
Add table
Add a link
Reference in a new issue