mirror of
https://github.com/golang/go.git
synced 2026-06-27 03:11:23 +00:00
archive/zip: fix writer-side Zip64 edge cases
I ran into this because the broken Writer caused mysterious and very
hard to debug failures uploading archive/zip-generated files to the
Internet Archive. (Only zip files bigger than 4GiB *and* smaller than
around 6.5GiB failed. I still don't have an explanation for the latter
part, maybe the parser has different logic for when the count of records
crosses 65535 and the Zip64 EOCD is used.)
Reproducing testdata/zip64/*.zsparse:
Inputs (sparse zero files via `truncate -s N NAME`, sizes in bytes):
big5g.bin 5<<30 big4g.bin 4<<30
big4g-1.bin (4<<30) - 1 big4g-2.bin (4<<30) - 2
under4g.bin (4<<30) - 59 first (4<<30) - 36
small.bin 42 (use `dd` for the non-sparse 42-byte file)
Cases (case → entries, M=0 Store, M=9 Deflate):
store-5g big5g.bin/0
deflate-zeros-5g big5g.bin/9
store-4g-minus-1 big4g-1.bin/0
store-4g-minus-2 big4g-2.bin/0
store-just-under-4g under4g.bin/0
store-exact-4g big4g.bin/0
offset-past-4g big5g.bin/0, small.bin/0
offset-eq-4g first/0, small.bin/0
Producers:
infozip-* Info-ZIP 3.0:
zip -q -X -M OUT.zip <entries>
libarchive-* bsdtar (libarchive):
bsdtar -cf OUT.zip --format zip \
--options zip:compression={store|deflate} <entries>
go126-* archive/zip from Go 1.26. Build with GOTOOLCHAIN=go1.26.0
from a tempdir whose go.mod declares `go 1.26.0`.
For each entry:
zip.FileHeader{Name, Method: zip.Store|zip.Deflate},
CreateHeader, io.CopyN(fw, zeros, size), w.Close().
Convert each OUT.zip to ${producer}-${case}.zsparse using the format
defined in archive/zip/zip64_sparse_test.go (scanSparse / readSparse):
walk the zip in 4 KiB chunks, drop chunks that are entirely zero,
coalesce adjacent non-zero chunks into spans, and serialize the result
as gzip of:
uint64 LE totalSize
uint32 LE numSpans
numSpans times:
uint64 LE offset
uint32 LE dataLen
dataLen bytes
Updates #22520
Fixes #23572
Fixes #33116
Fixes #69415
Change-Id: I6e24e7170094346af494da153c63e6b56a6a6964
Reviewed-on: https://go-review.googlesource.com/c/go/+/725161
Auto-Submit: Filippo Valsorda <filippo@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
SLSA-Policy-Verified: SLSA Policy Verification Service <devtools-gerritcodereview-exitgate@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
This commit is contained in:
parent
a7ea4a7ecd
commit
3a9c8e1d90
28 changed files with 975 additions and 66 deletions
|
|
@ -156,7 +156,11 @@ type FileHeader struct {
|
|||
// UncompressedSize64 is the uncompressed size of the file in bytes.
|
||||
UncompressedSize64 uint64
|
||||
|
||||
Extra []byte
|
||||
// Extra are the extensible data fields. The writer automatically includes
|
||||
// the appropriate Zip64 field if necessary, and [Writer.Close] appends the
|
||||
// Central Directory version of the Zip64 field to Extra.
|
||||
Extra []byte
|
||||
|
||||
ExternalAttrs uint32 // Meaning depends on CreatorVersion
|
||||
}
|
||||
|
||||
|
|
@ -337,11 +341,6 @@ func (h *FileHeader) SetMode(mode fs.FileMode) {
|
|||
}
|
||||
}
|
||||
|
||||
// isZip64 reports whether the file size exceeds the 32 bit limit
|
||||
func (h *FileHeader) isZip64() bool {
|
||||
return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
|
||||
}
|
||||
|
||||
func (h *FileHeader) hasDataDescriptor() bool {
|
||||
return h.Flags&0x8 != 0
|
||||
}
|
||||
|
|
|
|||
BIN
src/archive/zip/testdata/zip64/go126-deflate-zeros-5g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-deflate-zeros-5g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/go126-offset-eq-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-offset-eq-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/go126-offset-past-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-offset-past-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/go126-store-4g-minus-1.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-store-4g-minus-1.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/go126-store-4g-minus-2.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-store-4g-minus-2.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/go126-store-5g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-store-5g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/go126-store-exact-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-store-exact-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/go126-store-just-under-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/go126-store-just-under-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-deflate-zeros-5g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-deflate-zeros-5g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-offset-eq-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-offset-eq-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-offset-past-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-offset-past-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-store-4g-minus-1.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-store-4g-minus-1.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-store-4g-minus-2.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-store-4g-minus-2.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-store-5g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-store-5g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-store-exact-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-store-exact-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/infozip-store-just-under-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/infozip-store-just-under-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-deflate-zeros-5g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-deflate-zeros-5g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-offset-eq-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-offset-eq-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-offset-past-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-offset-past-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-store-4g-minus-1.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-store-4g-minus-1.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-store-4g-minus-2.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-store-4g-minus-2.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-store-5g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-store-5g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-store-exact-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-store-exact-4g.zsparse
vendored
Normal file
Binary file not shown.
BIN
src/archive/zip/testdata/zip64/libarchive-store-just-under-4g.zsparse
vendored
Normal file
BIN
src/archive/zip/testdata/zip64/libarchive-store-just-under-4g.zsparse
vendored
Normal file
Binary file not shown.
|
|
@ -93,48 +93,65 @@ func (w *Writer) Close() error {
|
|||
|
||||
// write central directory
|
||||
start := w.cw.count
|
||||
usedZip64 := false
|
||||
for _, h := range w.dir {
|
||||
// For the Central Directory, we always have the correct sizes.
|
||||
//
|
||||
// Implementations disagree on what triggers the inclusion of a Zip64
|
||||
// extra field: Info-ZIP only writes it if any size or offset EXCEEDS
|
||||
// 4GiB - 1, while libarchive writes it if any size REACHES OR EXCEEDS
|
||||
// 4GiB - 1, or if the offset EXCEEDS 4GiB - 1. The spec is ambiguous.
|
||||
//
|
||||
// We conservatively write Zip64 extra fields if any size or offset
|
||||
// REACHES OR EXCEEDS 4GiB - 1, to maximize compatibility with readers.
|
||||
// There is no ambiguity in parsing, so there is no downside to it.
|
||||
//
|
||||
// The spec is clear though that all and only the fields that REACH OR
|
||||
// EXCEED 4GiB - 1 are included in the Zip64 extra, once it's present.
|
||||
readerVersion := h.ReaderVersion
|
||||
if h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max || h.offset >= uint32max {
|
||||
usedZip64 = true
|
||||
readerVersion = max(readerVersion, zipVersion45)
|
||||
var size uint16
|
||||
var buf [28]byte // 2x uint16 + up to 3x uint64
|
||||
eb := writeBuf(buf[:])
|
||||
eb.uint16(zip64ExtraID)
|
||||
eb.uint16(0) // size to be filled out later
|
||||
if h.UncompressedSize64 >= uint32max {
|
||||
eb.uint64(h.UncompressedSize64)
|
||||
size += 8
|
||||
}
|
||||
if h.CompressedSize64 >= uint32max {
|
||||
eb.uint64(h.CompressedSize64)
|
||||
size += 8
|
||||
}
|
||||
if h.offset >= uint32max {
|
||||
eb.uint64(h.offset)
|
||||
size += 8
|
||||
}
|
||||
sb := writeBuf(buf[2:])
|
||||
sb.uint16(size)
|
||||
h.Extra = append(h.Extra, buf[:4+size]...)
|
||||
}
|
||||
|
||||
var buf [directoryHeaderLen]byte
|
||||
b := writeBuf(buf[:])
|
||||
b.uint32(uint32(directoryHeaderSignature))
|
||||
b.uint16(h.CreatorVersion)
|
||||
b.uint16(h.ReaderVersion)
|
||||
b.uint16(readerVersion)
|
||||
b.uint16(h.Flags)
|
||||
b.uint16(h.Method)
|
||||
b.uint16(h.ModifiedTime)
|
||||
b.uint16(h.ModifiedDate)
|
||||
b.uint32(h.CRC32)
|
||||
if h.isZip64() || h.offset >= uint32max {
|
||||
// the file needs a zip64 header. store maxint in both
|
||||
// 32 bit size fields (and offset later) to signal that the
|
||||
// zip64 extra header should be used.
|
||||
b.uint32(uint32max) // compressed size
|
||||
b.uint32(uint32max) // uncompressed size
|
||||
|
||||
// append a zip64 extra block to Extra
|
||||
var buf [28]byte // 2x uint16 + 3x uint64
|
||||
eb := writeBuf(buf[:])
|
||||
eb.uint16(zip64ExtraID)
|
||||
eb.uint16(24) // size = 3x uint64
|
||||
eb.uint64(h.UncompressedSize64)
|
||||
eb.uint64(h.CompressedSize64)
|
||||
eb.uint64(h.offset)
|
||||
h.Extra = append(h.Extra, buf[:]...)
|
||||
} else {
|
||||
b.uint32(h.CompressedSize)
|
||||
b.uint32(h.UncompressedSize)
|
||||
}
|
||||
|
||||
b.uint32(uint32(min(h.CompressedSize64, uint32max)))
|
||||
b.uint32(uint32(min(h.UncompressedSize64, uint32max)))
|
||||
b.uint16(uint16(len(h.Name)))
|
||||
b.uint16(uint16(len(h.Extra)))
|
||||
b.uint16(uint16(len(h.Comment)))
|
||||
b = b[4:] // skip disk number start and internal file attr (2x uint16)
|
||||
b.uint32(h.ExternalAttrs)
|
||||
if h.offset > uint32max {
|
||||
b.uint32(uint32max)
|
||||
} else {
|
||||
b.uint32(uint32(h.offset))
|
||||
}
|
||||
b.uint32(uint32(min(h.offset, uint32max)))
|
||||
if _, err := w.cw.Write(buf[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -158,7 +175,11 @@ func (w *Writer) Close() error {
|
|||
f(size, offset)
|
||||
}
|
||||
|
||||
if records >= uint16max || size >= uint32max || offset >= uint32max {
|
||||
// Emit the Zip64 EOCD records whenever any individual entry needed a Zip64
|
||||
// extra field, even if the EOCD's own fields fit in 32 bits, matching
|
||||
// Info-ZIP (but not libarchive). See APPNOTE 4.3.9.2: "when Zip64
|
||||
// extensions are in use, the EOCD64 record must be present."
|
||||
if usedZip64 || records >= uint16max || size >= uint32max || offset >= uint32max {
|
||||
var buf [directory64EndLen + directory64LocLen]byte
|
||||
b := writeBuf(buf[:])
|
||||
|
||||
|
|
@ -183,24 +204,18 @@ func (w *Writer) Close() error {
|
|||
if _, err := w.cw.Write(buf[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// store max values in the regular end record to signal
|
||||
// that the zip64 values should be used instead
|
||||
records = uint16max
|
||||
size = uint32max
|
||||
offset = uint32max
|
||||
}
|
||||
|
||||
// write end record
|
||||
var buf [directoryEndLen]byte
|
||||
b := writeBuf(buf[:])
|
||||
b.uint32(uint32(directoryEndSignature))
|
||||
b = b[4:] // skip over disk number and first disk number (2x uint16)
|
||||
b.uint16(uint16(records)) // number of entries this disk
|
||||
b.uint16(uint16(records)) // number of entries total
|
||||
b.uint32(uint32(size)) // size of directory
|
||||
b.uint32(uint32(offset)) // start of directory
|
||||
b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
|
||||
b = b[4:] // skip over disk number and first disk number (2x uint16)
|
||||
b.uint16(uint16(min(uint16max, records))) // number of entries this disk
|
||||
b.uint16(uint16(min(uint16max, records))) // number of entries total
|
||||
b.uint32(uint32(min(uint32max, size))) // size of directory
|
||||
b.uint32(uint32(min(uint32max, offset))) // start of directory
|
||||
b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
|
||||
if _, err := w.cw.Write(buf[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -398,38 +413,79 @@ func writeHeader(w io.Writer, h *header) error {
|
|||
return errLongExtra
|
||||
}
|
||||
|
||||
// The correct behavior of a streaming writer, implemented by Info-ZIP 3.0,
|
||||
// would be to write 0xFFFFFFFF in the size fields and then write a Zip64
|
||||
// extra field with the sizes at zero (to signal they are stored in a ZIP64
|
||||
// data descriptor, in case the file is > 4GiB).
|
||||
//
|
||||
// We don't do that, and instead write zeroes directly in the size fields,
|
||||
// because that wastes 28 bytes for every file smaller than 4GiB, and
|
||||
// because it would change the encoding of nearly every zip file created by
|
||||
// archive/zip. (No one should rely on it being stable, but still.)
|
||||
//
|
||||
// Anyway, the Local File Header is not that important, as the Central
|
||||
// Directory is authoritative, and there we always write the correct sizes.
|
||||
//
|
||||
// If we do know the sizes, because [Writer.CreateRaw] is used and the data
|
||||
// descriptor flag is not set, then we write them to the header. If either
|
||||
// size reaches 4GiB, we write 0xFFFFFFFF placeholders and a Zip64 extra
|
||||
// field with BOTH sizes, per the spec and matching Info-ZIP. Note this is
|
||||
// different from the Central Directory Zip64 extra field logic, somehow.
|
||||
//
|
||||
// (One final interesting case that doesn't apply to us: if the input is
|
||||
// streaming but the output is seekable, Info-ZIP always writes Zip64 extra
|
||||
// fields, and then goes back and patches in the sizes, even for files < 4GiB.)
|
||||
|
||||
var zip64ExtraInfo []byte
|
||||
readerVersion := h.ReaderVersion
|
||||
noDataDescriptor := h.raw && !h.hasDataDescriptor()
|
||||
if noDataDescriptor && (h.CompressedSize64 > uint32max || h.UncompressedSize64 > uint32max) {
|
||||
readerVersion = max(readerVersion, zipVersion45)
|
||||
zip64ExtraInfo = make([]byte, 20) // 2x uint16 + 2x uint64
|
||||
b := writeBuf(zip64ExtraInfo)
|
||||
b.uint16(zip64ExtraID)
|
||||
b.uint16(16) // size of Zip64 extra field data
|
||||
b.uint64(h.UncompressedSize64)
|
||||
b.uint64(h.CompressedSize64)
|
||||
}
|
||||
|
||||
var buf [fileHeaderLen]byte
|
||||
b := writeBuf(buf[:])
|
||||
b.uint32(uint32(fileHeaderSignature))
|
||||
b.uint16(h.ReaderVersion)
|
||||
b.uint16(readerVersion)
|
||||
b.uint16(h.Flags)
|
||||
b.uint16(h.Method)
|
||||
b.uint16(h.ModifiedTime)
|
||||
b.uint16(h.ModifiedDate)
|
||||
// In raw mode (caller does the compression), the values are either
|
||||
// written here or in the trailing data descriptor based on the header
|
||||
// flags.
|
||||
if h.raw && !h.hasDataDescriptor() {
|
||||
if noDataDescriptor {
|
||||
b.uint32(h.CRC32)
|
||||
b.uint32(uint32(min(h.CompressedSize64, uint32max)))
|
||||
b.uint32(uint32(min(h.UncompressedSize64, uint32max)))
|
||||
if zip64ExtraInfo != nil {
|
||||
b.uint32(uint32max)
|
||||
b.uint32(uint32max)
|
||||
} else {
|
||||
b.uint32(uint32(h.CompressedSize64))
|
||||
b.uint32(uint32(h.UncompressedSize64))
|
||||
}
|
||||
} else {
|
||||
// When this package handle the compression, these values are
|
||||
// always written to the trailing data descriptor.
|
||||
b.uint32(0) // crc32
|
||||
b.uint32(0) // compressed size
|
||||
b.uint32(0) // uncompressed size
|
||||
}
|
||||
b.uint16(uint16(len(h.Name)))
|
||||
b.uint16(uint16(len(h.Extra)))
|
||||
b.uint16(uint16(len(h.Extra) + len(zip64ExtraInfo)))
|
||||
if _, err := w.Write(buf[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := io.WriteString(w, h.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := w.Write(h.Extra)
|
||||
return err
|
||||
if _, err := w.Write(h.Extra); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.Write(zip64ExtraInfo); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateRaw adds a file to the zip archive using the provided [FileHeader] and
|
||||
|
|
@ -601,7 +657,7 @@ func (w *fileWriter) close() error {
|
|||
fh.CompressedSize64 = uint64(w.compCount.count)
|
||||
fh.UncompressedSize64 = uint64(w.rawCount.count)
|
||||
|
||||
if fh.isZip64() {
|
||||
if w.CompressedSize64 > uint32max || w.UncompressedSize64 > uint32max {
|
||||
fh.CompressedSize = uint32max
|
||||
fh.UncompressedSize = uint32max
|
||||
fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
|
||||
|
|
@ -617,13 +673,13 @@ func (w *fileWriter) writeDataDescriptor() error {
|
|||
if !w.hasDataDescriptor() {
|
||||
return nil
|
||||
}
|
||||
// Write data descriptor. This is more complicated than one would
|
||||
// think, see e.g. comments in zipfile.c:putextended() and
|
||||
// https://bugs.openjdk.org/browse/JDK-7073588.
|
||||
// The approach here is to write 8 byte sizes if needed without
|
||||
// adding a zip64 extra in the local header (too late anyway).
|
||||
// See the comment in [writeHeader] about how and why we don't signal ZIP64
|
||||
// mode in the local file header. If one of the sizes turns out to exceed
|
||||
// 4GiB, we use the 64-bit sizes anyway, for lack of alternatives.
|
||||
//
|
||||
// See also https://bugs.openjdk.org/browse/JDK-7073588.
|
||||
var buf []byte
|
||||
if w.isZip64() {
|
||||
if w.CompressedSize64 > uint32max || w.UncompressedSize64 > uint32max {
|
||||
buf = make([]byte, dataDescriptor64Len)
|
||||
} else {
|
||||
buf = make([]byte, dataDescriptorLen)
|
||||
|
|
@ -631,7 +687,7 @@ func (w *fileWriter) writeDataDescriptor() error {
|
|||
b := writeBuf(buf)
|
||||
b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
|
||||
b.uint32(w.CRC32)
|
||||
if w.isZip64() {
|
||||
if w.CompressedSize64 > uint32max || w.UncompressedSize64 > uint32max {
|
||||
b.uint64(w.CompressedSize64)
|
||||
b.uint64(w.UncompressedSize64)
|
||||
} else {
|
||||
|
|
|
|||
240
src/archive/zip/zip64_sparse_test.go
Normal file
240
src/archive/zip/zip64_sparse_test.go
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package zip
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"compress/gzip"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"slices"
|
||||
)
|
||||
|
||||
// A sparseFile represents an archive as a sequence of non-zero byte spans
|
||||
// (the LFH headers, the Central Directory, the EOCD records, and any
|
||||
// non-zero compressed bodies) plus a total length. Bytes outside any span
|
||||
// are implicitly zero. This is the storage format used for goldens under
|
||||
// testdata/zip64/ (suffix .zsparse) and the in-memory shape produced by
|
||||
// the writer-reproduction harness.
|
||||
//
|
||||
// On-disk layout (all little-endian):
|
||||
//
|
||||
// uint64 size
|
||||
// uint32 numSpans
|
||||
// for each span:
|
||||
// uint64 offset
|
||||
// uint32 dataLen
|
||||
// dataLen bytes
|
||||
//
|
||||
// Spans are sorted by offset and non-overlapping.
|
||||
type sparseFile struct {
|
||||
Size int64
|
||||
Spans []sparseSpan
|
||||
}
|
||||
|
||||
type sparseSpan struct {
|
||||
Offset int64
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// ReadAt implements [io.ReaderAt] by serving the underlying spans and
|
||||
// synthesizing zero bytes for any gap inside [0, Size).
|
||||
func (f *sparseFile) ReadAt(p []byte, off int64) (int, error) {
|
||||
if off < 0 {
|
||||
return 0, errors.New("sparseFile: negative offset")
|
||||
}
|
||||
if off >= f.Size {
|
||||
return 0, io.EOF
|
||||
}
|
||||
end := min(off+int64(len(p)), f.Size)
|
||||
n := int(end - off)
|
||||
clear(p[:n])
|
||||
for _, s := range f.Spans {
|
||||
sEnd := s.Offset + int64(len(s.Data))
|
||||
if sEnd <= off || s.Offset >= end {
|
||||
continue
|
||||
}
|
||||
from := max(s.Offset, off)
|
||||
to := min(sEnd, end)
|
||||
copy(p[from-off:to-off], s.Data[from-s.Offset:to-s.Offset])
|
||||
}
|
||||
if n < len(p) {
|
||||
return n, io.EOF
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// materializeTail returns the last keep bytes of the conceptual file as a
|
||||
// plain byte slice, suitable for [parseCD].
|
||||
func (f *sparseFile) materializeTail(keep int64) (data []byte, baseOff uint64) {
|
||||
if keep > f.Size {
|
||||
keep = f.Size
|
||||
}
|
||||
base := f.Size - keep
|
||||
buf := make([]byte, keep)
|
||||
f.ReadAt(buf, base)
|
||||
return buf, uint64(base)
|
||||
}
|
||||
|
||||
const sparseChunk = 4096
|
||||
|
||||
// scanSparse stream-reads r and builds a sparseFile, treating any contiguous
|
||||
// run of zero bytes (rounded to sparseChunk boundaries) as a gap. Adjacent
|
||||
// non-zero chunks are coalesced into one span.
|
||||
func scanSparse(r io.Reader) (*sparseFile, error) {
|
||||
f := &sparseFile{}
|
||||
var cur *sparseSpan
|
||||
buf := make([]byte, sparseChunk)
|
||||
for {
|
||||
n, err := io.ReadFull(r, buf)
|
||||
if n > 0 {
|
||||
chunk := buf[:n]
|
||||
if isAllZero(chunk) {
|
||||
if cur != nil {
|
||||
f.Spans = append(f.Spans, *cur)
|
||||
cur = nil
|
||||
}
|
||||
} else {
|
||||
if cur == nil {
|
||||
cur = &sparseSpan{Offset: f.Size}
|
||||
}
|
||||
cur.Data = append(cur.Data, chunk...)
|
||||
}
|
||||
f.Size += int64(n)
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if cur != nil {
|
||||
f.Spans = append(f.Spans, *cur)
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// writeSparse serializes f to w in the on-disk format described on
|
||||
// [sparseFile].
|
||||
func writeSparse(w io.Writer, f *sparseFile) error {
|
||||
var hdr [12]byte
|
||||
le.PutUint64(hdr[:8], uint64(f.Size))
|
||||
le.PutUint32(hdr[8:12], uint32(len(f.Spans)))
|
||||
if _, err := w.Write(hdr[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, s := range f.Spans {
|
||||
var b [12]byte
|
||||
le.PutUint64(b[:8], uint64(s.Offset))
|
||||
le.PutUint32(b[8:12], uint32(len(s.Data)))
|
||||
if _, err := w.Write(b[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.Write(s.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// readSparse parses the on-disk format from r.
|
||||
func readSparse(r io.Reader) (*sparseFile, error) {
|
||||
var hdr [12]byte
|
||||
if _, err := io.ReadFull(r, hdr[:]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f := &sparseFile{
|
||||
Size: int64(le.Uint64(hdr[:8])),
|
||||
}
|
||||
n := le.Uint32(hdr[8:12])
|
||||
if n > 1<<20 {
|
||||
return nil, fmt.Errorf("sparseFile: implausible span count %d", n)
|
||||
}
|
||||
f.Spans = make([]sparseSpan, n)
|
||||
for i := range f.Spans {
|
||||
var b [12]byte
|
||||
if _, err := io.ReadFull(r, b[:]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f.Spans[i].Offset = int64(le.Uint64(b[:8]))
|
||||
sz := le.Uint32(b[8:12])
|
||||
f.Spans[i].Data = make([]byte, sz)
|
||||
if _, err := io.ReadFull(r, f.Spans[i].Data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if !slices.IsSortedFunc(f.Spans, func(a, b sparseSpan) int {
|
||||
return cmp.Compare(a.Offset, b.Offset)
|
||||
}) {
|
||||
return nil, errors.New("sparseFile: spans not sorted")
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// readSparseFile reads a sparse file from path. The file is expected to be
|
||||
// gzip-compressed; the outer gzip wrap shrinks goldens that contain non-zero
|
||||
// compressed bodies (e.g., the deflate-zeros entries) by 100x because
|
||||
// deflate-of-zeros is highly repetitive. Small Store goldens benefit too:
|
||||
// gzip's header overhead is ~30 bytes, well under the bytes saved on a 4 KB
|
||||
// sparse representation.
|
||||
func readSparseFile(path string) (*sparseFile, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
zr, err := gzip.NewReader(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer zr.Close()
|
||||
return readSparse(zr)
|
||||
}
|
||||
|
||||
// isAllZero reports whether every byte in b is 0.
|
||||
func isAllZero(b []byte) bool {
|
||||
for _, c := range b {
|
||||
if c != 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// sparseBuffer accumulates writes into a [sparseFile], dropping any
|
||||
// chunkSize-byte chunk that is all-zero. This makes capturing the result
|
||||
// of pushing multi-GiB streams of zeros through the writer almost free —
|
||||
// the only bytes that end up retained are the LFHs, the Central
|
||||
// Directory, the EOCD records, and any non-zero compressed body.
|
||||
type sparseBuffer struct {
|
||||
f sparseFile
|
||||
cur *sparseSpan
|
||||
}
|
||||
|
||||
func (t *sparseBuffer) Write(p []byte) (int, error) {
|
||||
n := len(p)
|
||||
for len(p) > 0 {
|
||||
k := len(p)
|
||||
if k > sparseChunk {
|
||||
k = sparseChunk
|
||||
}
|
||||
chunk := p[:k]
|
||||
if isAllZero(chunk) {
|
||||
t.cur = nil
|
||||
} else {
|
||||
if t.cur == nil {
|
||||
t.f.Spans = append(t.f.Spans, sparseSpan{Offset: t.f.Size})
|
||||
t.cur = &t.f.Spans[len(t.f.Spans)-1]
|
||||
}
|
||||
t.cur.Data = append(t.cur.Data, chunk...)
|
||||
}
|
||||
t.f.Size += int64(k)
|
||||
p = p[k:]
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
614
src/archive/zip/zip64_test.go
Normal file
614
src/archive/zip/zip64_test.go
Normal file
|
|
@ -0,0 +1,614 @@
|
|||
// Copyright 2026 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package zip
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestZip64WriterCDGoldens checks that the archive/zip Writer emits a Central
|
||||
// Directory that matches the Zip64 conventions used by Info-ZIP, libarchive,
|
||||
// and the pre-CL archive/zip writer (go126-*), for archives at or above 4 GiB,
|
||||
// except where we intentionally diverged.
|
||||
//
|
||||
// For each golden in testdata/zip64/*.zsparse (see [sparseFile] for the
|
||||
// committed format), the test:
|
||||
// 1. Parses the golden's CD into a producer-independent snapshot — which
|
||||
// fields hold 0xFFFFFFFF placeholders, which Zip64 extra sub-fields are
|
||||
// present and in what order, and the EOCD/EOCD64 values.
|
||||
// 2. Verifies the production [NewReader] parses the same archive.
|
||||
// 3. Replays the same entries through a fresh [Writer] into a [sparseBuffer]
|
||||
// and parses our own CD.
|
||||
// 4. Verifies the production [NewReader] parses our reproduced archive too.
|
||||
// 5. Compares the two snapshots field-by-field, ignoring producer-specific
|
||||
// details (creator version, external attrs, non-Zip64 extras, absolute
|
||||
// byte offsets that depend on LFH/data-descriptor layout).
|
||||
func TestZip64WriterCDGoldens(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping in short mode; each golden replays a multi-GiB write")
|
||||
}
|
||||
|
||||
matches, err := filepath.Glob("testdata/zip64/*.zsparse")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(matches) == 0 {
|
||||
t.Fatal("missing Zip64 goldens in testdata/zip64")
|
||||
}
|
||||
|
||||
// Tail materialized for parseCD. Goldens have ≤ 2 entries; their CD
|
||||
// plus EOCD records fits in well under 1 MiB.
|
||||
const tailKeep = 1 << 20
|
||||
|
||||
// archive/zip's writer takes the most defensive position on every
|
||||
// spec-fuzzy point: it always emits the Zip64 extra at the 0xFFFFFFFF
|
||||
// boundary (matching libarchive but more conservative than Info-ZIP) AND
|
||||
// emits EOCD64 whenever any entry has a Zip64 extra in its CD record
|
||||
// (matching Info-ZIP but more conservative than libarchive). The go126-
|
||||
// goldens are output of an older archive/zip writer, and the format
|
||||
// deliberately diverges; they are kept here so the reader-side check
|
||||
// enforces backwards compatibility with archives produced by our own past
|
||||
// writer, and to ensure we only diverge where intended.
|
||||
expectedDiff := map[string]bool{
|
||||
// Info-ZIP treats a CD size field of exactly 0xFFFFFFFF as a real
|
||||
// value and omits the Zip64 extra; archive/zip defensively emits
|
||||
// the Zip64 extra with USize64+CSize64.
|
||||
"infozip-store-4g-minus-1": true,
|
||||
|
||||
// Info-ZIP treats a CD offset field of exactly 0xFFFFFFFF as a real
|
||||
// value and omits the Zip64 extra for offset; archive/zip defensively
|
||||
// emits the Zip64 extra with the offset sub-field.
|
||||
"infozip-offset-eq-4g": true,
|
||||
|
||||
// libarchive's writer emits EOCD64 only on EOCD-level overflow (CD
|
||||
// size/offset > 4GiB, records > 0xFFFF); archive/zip also emits
|
||||
// EOCD64 when any per-entry CD record uses a Zip64 extra, even if
|
||||
// the EOCD fields fit in 32 bits.
|
||||
"libarchive-deflate-zeros-5g": true,
|
||||
|
||||
// libarchive's LFH always carries a UT timestamp extra (~9 bytes),
|
||||
// so its dirOffset for a body of 4GiB-59 lands just past 0xFFFFFFFF
|
||||
// and it emits EOCD64. archive/zip's streaming LFH has no such
|
||||
// extras and stays under uint32max.
|
||||
"libarchive-store-just-under-4g": true,
|
||||
|
||||
// The old archive/zip writer differs from the current writer on
|
||||
// every Zip64-using entry: it always wrote a fixed 24-byte Zip64
|
||||
// extra with all three sub-fields (usize, csize, offset) and set
|
||||
// both 32-bit size fields to 0xFFFFFFFF whenever the per-entry
|
||||
// trigger fired; it also set the EOCD records/size/offset to the
|
||||
// placeholder values whenever EOCD64 was present.
|
||||
"go126-store-5g": true,
|
||||
"go126-deflate-zeros-5g": true,
|
||||
"go126-store-4g-minus-1": true,
|
||||
"go126-store-4g-minus-2": true,
|
||||
"go126-store-exact-4g": true,
|
||||
"go126-offset-past-4g": true,
|
||||
"go126-offset-eq-4g": true,
|
||||
"go126-store-just-under-4g": false,
|
||||
}
|
||||
|
||||
for _, path := range matches {
|
||||
name := strings.TrimSuffix(filepath.Base(path), ".zsparse")
|
||||
t.Run(name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
goldenSF, err := readSparseFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("read golden: %v", err)
|
||||
}
|
||||
goldenData, goldenBase := goldenSF.materializeTail(tailKeep)
|
||||
golden, err := parseCD(goldenData, goldenBase)
|
||||
if err != nil {
|
||||
t.Fatalf("parse golden CD: %v", err)
|
||||
}
|
||||
|
||||
// Verify the production Reader can parse the full golden.
|
||||
checkReaderMatchesSnapshot(t, "golden", goldenSF, golden)
|
||||
|
||||
oursSF := reproduceCD(t, golden)
|
||||
oursData, oursBase := oursSF.materializeTail(tailKeep)
|
||||
got, err := parseCD(oursData, oursBase)
|
||||
if err != nil {
|
||||
t.Fatalf("parse reproduced CD: %v\nbytes:\n%s", err, hexDump(oursData))
|
||||
}
|
||||
// Verify the production Reader can parse archive/zip's own
|
||||
// output and gets the same view of the entries.
|
||||
checkReaderMatchesSnapshot(t, "reproduced", oursSF, got)
|
||||
|
||||
if expectedDiff[name] {
|
||||
var cap captureReporter
|
||||
compareCDSnapshots(&cap, golden, got)
|
||||
if !cap.failed {
|
||||
t.Errorf("expected this golden to fail equivalence, but it passed")
|
||||
} else {
|
||||
t.Logf("expected mismatch:\n%s", indent(cap.msg.String(), " "))
|
||||
}
|
||||
return
|
||||
}
|
||||
compareCDSnapshots(t, golden, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// errReporter is the subset of [testing.TB] that [compareCDSnapshots] uses.
|
||||
// The captureReporter implementation lets the test capture mismatches for
|
||||
// expected-failure cases instead of propagating them to the outer t.
|
||||
type errReporter interface {
|
||||
Errorf(format string, args ...any)
|
||||
Helper()
|
||||
}
|
||||
|
||||
type captureReporter struct {
|
||||
failed bool
|
||||
msg strings.Builder
|
||||
}
|
||||
|
||||
func (c *captureReporter) Errorf(format string, args ...any) {
|
||||
c.failed = true
|
||||
fmt.Fprintf(&c.msg, format+"\n", args...)
|
||||
}
|
||||
|
||||
func (c *captureReporter) Helper() {}
|
||||
|
||||
// checkReaderMatchesSnapshot opens the archive backed by the sparseFile
|
||||
// using the production [NewReader] and asserts that the entry list it
|
||||
// returns matches the [cdSnapshot] (entry count, names, resolved 64-bit
|
||||
// sizes).
|
||||
func checkReaderMatchesSnapshot(t *testing.T, label string, f *sparseFile, snap *cdSnapshot) {
|
||||
t.Helper()
|
||||
zr, err := NewReader(f, f.Size)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: NewReader: %v", label, err)
|
||||
}
|
||||
if g, w := len(zr.File), len(snap.Entries); g != w {
|
||||
t.Errorf("%s: NewReader returned %d files, parseCD found %d", label, g, w)
|
||||
return
|
||||
}
|
||||
for i, f := range zr.File {
|
||||
want := &snap.Entries[i]
|
||||
if f.Name != want.Name {
|
||||
t.Errorf("%s entry %d: Name = %q, want %q", label, i, f.Name, want.Name)
|
||||
}
|
||||
if f.UncompressedSize64 != want.USize64 {
|
||||
t.Errorf("%s entry %d %q: UncompressedSize64 = %d, want %d", label, i, want.Name, f.UncompressedSize64, want.USize64)
|
||||
}
|
||||
if f.CompressedSize64 != want.CSize64 {
|
||||
t.Errorf("%s entry %d %q: CompressedSize64 = %d, want %d", label, i, want.Name, f.CompressedSize64, want.CSize64)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// indent prefixes every line of s with prefix.
|
||||
func indent(s, prefix string) string {
|
||||
if s == "" {
|
||||
return s
|
||||
}
|
||||
lines := strings.Split(strings.TrimRight(s, "\n"), "\n")
|
||||
for i, l := range lines {
|
||||
lines[i] = prefix + l
|
||||
}
|
||||
return strings.Join(lines, "\n") + "\n"
|
||||
}
|
||||
|
||||
// reproduceCD writes a zip archive with the same logical entries as golden
|
||||
// into a [sparseBuffer] (which drops all-zero chunks, so pushing multi-GiB
|
||||
// streams of zeros through the writer is essentially free) and returns the
|
||||
// resulting [sparseFile].
|
||||
//
|
||||
// For entries where compressed == uncompressed (Store, or other 1:1 cases)
|
||||
// we drive the Writer through [Writer.CreateHeader] so that the data
|
||||
// descriptor, offset accounting, and Close-time CD emission all exercise
|
||||
// the production streaming path. The CRC32 hasher is replaced with
|
||||
// [fakeHash32] to avoid hashing many GiB of zeros.
|
||||
//
|
||||
// For entries where compressed ≪ uncompressed (Method=Deflate over zeros),
|
||||
// actually deflating multi-GiB streams at test time is prohibitively slow,
|
||||
// so we fall back to [Writer.CreateRaw] and declare the sizes directly.
|
||||
// The Central Directory output is identical either way.
|
||||
func reproduceCD(t *testing.T, golden *cdSnapshot) *sparseFile {
|
||||
t.Helper()
|
||||
sb := &sparseBuffer{}
|
||||
w := NewWriter(sb)
|
||||
for i, e := range golden.Entries {
|
||||
if e.CSize64 == e.USize64 {
|
||||
fh := &FileHeader{Name: e.Name, Method: e.Method}
|
||||
fw, err := w.CreateHeader(fh)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateHeader[%d %q]: %v", i, e.Name, err)
|
||||
}
|
||||
fw.(*fileWriter).crc32 = fakeHash32{}
|
||||
if _, err := io.CopyN(fw, zeros{}, int64(e.USize64)); err != nil {
|
||||
t.Fatalf("CopyN[%d %q]: %v", i, e.Name, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
fh := &FileHeader{
|
||||
Name: e.Name,
|
||||
Method: e.Method,
|
||||
CompressedSize64: e.CSize64,
|
||||
UncompressedSize64: e.USize64,
|
||||
}
|
||||
fw, err := w.CreateRaw(fh)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateRaw[%d %q]: %v", i, e.Name, err)
|
||||
}
|
||||
if _, err := io.CopyN(fw, zeros{}, int64(e.CSize64)); err != nil {
|
||||
t.Fatalf("CopyN[%d %q]: %v", i, e.Name, err)
|
||||
}
|
||||
}
|
||||
if err := w.Close(); err != nil {
|
||||
t.Fatalf("Close: %v", err)
|
||||
}
|
||||
return &sb.f
|
||||
}
|
||||
|
||||
// compareCDSnapshots asserts that got matches want on Zip64-relevant fields.
|
||||
//
|
||||
// Per-entry size fields (RawCSize, RawUSize, CSize64, USize64) are compared
|
||||
// exactly — we feed them in from the golden when reproducing, so the writer
|
||||
// has no excuse to disagree. Per-entry RawOffset and the EOCD records/size/
|
||||
// offset fields are compared only as placeholder-or-not: their absolute
|
||||
// values depend on producer-specific LFH layout (Info-ZIP packs sizes into
|
||||
// the LFH; archive/zip's streaming path uses a data descriptor; libarchive
|
||||
// adds UT extras) and that's not what this test is pinning down.
|
||||
func compareCDSnapshots(t errReporter, want, got *cdSnapshot) {
|
||||
t.Helper()
|
||||
if g, w := len(got.Entries), len(want.Entries); g != w {
|
||||
t.Errorf("entry count = %d, want %d", g, w)
|
||||
return
|
||||
}
|
||||
for i := range want.Entries {
|
||||
we, ge := &want.Entries[i], &got.Entries[i]
|
||||
// csize and usize come from the declared FileHeader values, so the
|
||||
// raw 32-bit fields must match exactly (real value vs. placeholder
|
||||
// choice and, when not placeholder, the value itself).
|
||||
if we.RawCSize != ge.RawCSize {
|
||||
t.Errorf("entry %d %q: RawCSize = %#08x, want %#08x", i, we.Name, ge.RawCSize, we.RawCSize)
|
||||
}
|
||||
if we.RawUSize != ge.RawUSize {
|
||||
t.Errorf("entry %d %q: RawUSize = %#08x, want %#08x", i, we.Name, ge.RawUSize, we.RawUSize)
|
||||
}
|
||||
// Resolved csize/usize must match — we fed them in from the golden.
|
||||
if we.CSize64 != ge.CSize64 {
|
||||
t.Errorf("entry %d %q: CSize64 = %d, want %d", i, we.Name, ge.CSize64, we.CSize64)
|
||||
}
|
||||
if we.USize64 != ge.USize64 {
|
||||
t.Errorf("entry %d %q: USize64 = %d, want %d", i, we.Name, ge.USize64, we.USize64)
|
||||
}
|
||||
// Offset is layout-dependent. Compare placeholder-or-not, not value.
|
||||
if isPlaceholder32(we.RawOffset) != isPlaceholder32(ge.RawOffset) {
|
||||
t.Errorf("entry %d %q: RawOffset placeholder = %#08x, want %#08x", i, we.Name, ge.RawOffset, we.RawOffset)
|
||||
}
|
||||
|
||||
// Zip64 sub-field presence/order, must match exactly.
|
||||
if !slices.Equal(we.Z64ExtraFields, ge.Z64ExtraFields) {
|
||||
t.Errorf("entry %d %q: Zip64 sub-field order = %v, want %v", i, we.Name, ge.Z64ExtraFields, we.Z64ExtraFields)
|
||||
}
|
||||
// ReaderVersion ≥ 45 whenever a Zip64 extra is present.
|
||||
if len(we.Z64ExtraFields) > 0 && ge.ReaderVersion < zipVersion45 {
|
||||
t.Errorf("entry %d %q: ReaderVersion = %d, want ≥ %d (Zip64 extra present)", i, we.Name, ge.ReaderVersion, zipVersion45)
|
||||
}
|
||||
}
|
||||
|
||||
// EOCD: compare placeholder-or-not for each field. Exact values are
|
||||
// layout-dependent.
|
||||
if isPlaceholder16(want.EOCD.Records) != isPlaceholder16(got.EOCD.Records) {
|
||||
t.Errorf("EOCD records placeholder = %#x, want %#x", got.EOCD.Records, want.EOCD.Records)
|
||||
}
|
||||
if isPlaceholder32(want.EOCD.Size) != isPlaceholder32(got.EOCD.Size) {
|
||||
t.Errorf("EOCD size placeholder = %#x, want %#x", got.EOCD.Size, want.EOCD.Size)
|
||||
}
|
||||
if isPlaceholder32(want.EOCD.Offset) != isPlaceholder32(got.EOCD.Offset) {
|
||||
t.Errorf("EOCD offset placeholder = %#x, want %#x", got.EOCD.Offset, want.EOCD.Offset)
|
||||
}
|
||||
|
||||
if got.HasEOCD64 != want.HasEOCD64 {
|
||||
t.Errorf("EOCD64 present = %v, want %v", got.HasEOCD64, want.HasEOCD64)
|
||||
}
|
||||
if want.HasEOCD64 && got.HasEOCD64 {
|
||||
if got.EOCD64.Records != want.EOCD64.Records {
|
||||
t.Errorf("EOCD64 records = %d, want %d", got.EOCD64.Records, want.EOCD64.Records)
|
||||
}
|
||||
// EOCD64.Size and EOCD64.Offset are layout-dependent.
|
||||
}
|
||||
}
|
||||
|
||||
func isPlaceholder32(v uint32) bool { return v == uint32max }
|
||||
func isPlaceholder16(v uint16) bool { return v == uint16max }
|
||||
|
||||
// CD snapshot types and parser
|
||||
|
||||
// zip64SubID identifies one of the three sub-fields that may appear in a
|
||||
// Zip64 extended-information extra field, in the spec-defined order.
|
||||
type zip64SubID int
|
||||
|
||||
const (
|
||||
z64USize zip64SubID = iota + 1
|
||||
z64CSize
|
||||
z64Offset
|
||||
)
|
||||
|
||||
func (s zip64SubID) String() string {
|
||||
switch s {
|
||||
case z64USize:
|
||||
return "usize"
|
||||
case z64CSize:
|
||||
return "csize"
|
||||
case z64Offset:
|
||||
return "offset"
|
||||
}
|
||||
return fmt.Sprintf("zip64SubID(%d)", int(s))
|
||||
}
|
||||
|
||||
type cdEntry struct {
|
||||
Name string
|
||||
Method uint16
|
||||
ReaderVersion uint16
|
||||
|
||||
// Raw 32-bit fields from the CD record. A value of 0xFFFFFFFF indicates
|
||||
// the real value is in the Zip64 extended-information extra field.
|
||||
RawCSize uint32
|
||||
RawUSize uint32
|
||||
RawOffset uint32
|
||||
|
||||
// Resolved 64-bit values (from the 32-bit field if not a placeholder,
|
||||
// otherwise from the Zip64 extra).
|
||||
CSize64 uint64
|
||||
USize64 uint64
|
||||
Offset64 uint64
|
||||
|
||||
// Sub-fields present in the Zip64 extra, in the order they appear.
|
||||
Z64ExtraFields []zip64SubID
|
||||
}
|
||||
|
||||
type eocdRec struct {
|
||||
Records uint16 // 0xFFFF if placeholder
|
||||
Size uint32 // 0xFFFFFFFF if placeholder
|
||||
Offset uint32 // 0xFFFFFFFF if placeholder
|
||||
}
|
||||
|
||||
type eocd64Rec struct {
|
||||
Records uint64
|
||||
Size uint64
|
||||
Offset uint64
|
||||
}
|
||||
|
||||
type cdSnapshot struct {
|
||||
Entries []cdEntry
|
||||
EOCD eocdRec
|
||||
HasEOCD64 bool
|
||||
EOCD64 eocd64Rec
|
||||
}
|
||||
|
||||
var le = binary.LittleEndian
|
||||
|
||||
// parseCD parses the Central Directory and EOCD records of a zip archive
|
||||
// from its raw bytes. data must be the tail of the archive, with baseOffset
|
||||
// indicating where data[0] sits in the original archive (0 for whole-archive
|
||||
// input).
|
||||
func parseCD(data []byte, baseOffset uint64) (*cdSnapshot, error) {
|
||||
sigOff, err := findEOCD(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
snap := &cdSnapshot{}
|
||||
snap.EOCD.Records = le.Uint16(data[sigOff+10:])
|
||||
snap.EOCD.Size = le.Uint32(data[sigOff+12:])
|
||||
snap.EOCD.Offset = le.Uint32(data[sigOff+16:])
|
||||
|
||||
dirOffset := uint64(snap.EOCD.Offset)
|
||||
nRecords := uint64(snap.EOCD.Records)
|
||||
|
||||
// toData converts an absolute archive offset to a data slice offset,
|
||||
// returning false if it lies before our captured tail.
|
||||
toData := func(absOff uint64) (uint64, bool) {
|
||||
if absOff < baseOffset {
|
||||
return 0, false
|
||||
}
|
||||
return absOff - baseOffset, true
|
||||
}
|
||||
|
||||
// Look for an EOCD64 locator immediately preceding the EOCD record.
|
||||
if sigOff >= directory64LocLen {
|
||||
locOff := sigOff - directory64LocLen
|
||||
if le.Uint32(data[locOff:]) == directory64LocSignature {
|
||||
eocd64Off := le.Uint64(data[locOff+8:])
|
||||
eocd64DataOff, ok := toData(eocd64Off)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("zip: EOCD64 at %#x before captured tail (base %#x)", eocd64Off, baseOffset)
|
||||
}
|
||||
if eocd64DataOff+directory64EndLen > uint64(len(data)) {
|
||||
return nil, errors.New("zip: EOCD64 offset out of range")
|
||||
}
|
||||
if le.Uint32(data[eocd64DataOff:]) != directory64EndSignature {
|
||||
return nil, errors.New("zip: EOCD64 signature mismatch")
|
||||
}
|
||||
snap.HasEOCD64 = true
|
||||
snap.EOCD64.Records = le.Uint64(data[eocd64DataOff+32:])
|
||||
snap.EOCD64.Size = le.Uint64(data[eocd64DataOff+40:])
|
||||
snap.EOCD64.Offset = le.Uint64(data[eocd64DataOff+48:])
|
||||
dirOffset = snap.EOCD64.Offset
|
||||
nRecords = snap.EOCD64.Records
|
||||
}
|
||||
}
|
||||
|
||||
off, ok := toData(dirOffset)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("zip: CD at %#x before captured tail (base %#x)", dirOffset, baseOffset)
|
||||
}
|
||||
for i := uint64(0); i < nRecords; i++ {
|
||||
if off+directoryHeaderLen > uint64(len(data)) {
|
||||
return nil, fmt.Errorf("zip: CD entry %d out of range", i)
|
||||
}
|
||||
rec := data[off:]
|
||||
if le.Uint32(rec) != directoryHeaderSignature {
|
||||
return nil, fmt.Errorf("zip: bad CD signature at offset %d", off)
|
||||
}
|
||||
var e cdEntry
|
||||
e.ReaderVersion = le.Uint16(rec[6:])
|
||||
e.Method = le.Uint16(rec[10:])
|
||||
e.RawCSize = le.Uint32(rec[20:])
|
||||
e.RawUSize = le.Uint32(rec[24:])
|
||||
nameLen := uint64(le.Uint16(rec[28:]))
|
||||
extraLen := uint64(le.Uint16(rec[30:]))
|
||||
commLen := uint64(le.Uint16(rec[32:]))
|
||||
e.RawOffset = le.Uint32(rec[42:])
|
||||
|
||||
recLen := uint64(directoryHeaderLen) + nameLen + extraLen + commLen
|
||||
if off+recLen > uint64(len(data)) {
|
||||
return nil, fmt.Errorf("zip: CD entry %d truncated", i)
|
||||
}
|
||||
nameOff := off + directoryHeaderLen
|
||||
extraOff := nameOff + nameLen
|
||||
e.Name = string(data[nameOff:extraOff])
|
||||
extra := data[extraOff : extraOff+extraLen]
|
||||
|
||||
e.CSize64 = uint64(e.RawCSize)
|
||||
e.USize64 = uint64(e.RawUSize)
|
||||
e.Offset64 = uint64(e.RawOffset)
|
||||
|
||||
// Walk extra fields; consume the Zip64 sub-field if present.
|
||||
// Per the spec and Info-ZIP convention, the Zip64 extra contains
|
||||
// 8-byte values for exactly the size/offset fields whose 32-bit
|
||||
// counterpart is 0xFFFFFFFF, in the order: USize, CSize, Offset.
|
||||
for len(extra) >= 4 {
|
||||
tag := le.Uint16(extra)
|
||||
size := uint64(le.Uint16(extra[2:]))
|
||||
if 4+size > uint64(len(extra)) {
|
||||
break
|
||||
}
|
||||
field := extra[4 : 4+size]
|
||||
extra = extra[4+size:]
|
||||
if tag != zip64ExtraID {
|
||||
continue
|
||||
}
|
||||
if e.RawUSize == uint32max && len(field) >= 8 {
|
||||
e.USize64 = le.Uint64(field)
|
||||
e.Z64ExtraFields = append(e.Z64ExtraFields, z64USize)
|
||||
field = field[8:]
|
||||
}
|
||||
if e.RawCSize == uint32max && len(field) >= 8 {
|
||||
e.CSize64 = le.Uint64(field)
|
||||
e.Z64ExtraFields = append(e.Z64ExtraFields, z64CSize)
|
||||
field = field[8:]
|
||||
}
|
||||
if e.RawOffset == uint32max && len(field) >= 8 {
|
||||
e.Offset64 = le.Uint64(field)
|
||||
e.Z64ExtraFields = append(e.Z64ExtraFields, z64Offset)
|
||||
field = field[8:]
|
||||
}
|
||||
}
|
||||
|
||||
snap.Entries = append(snap.Entries, e)
|
||||
off += recLen
|
||||
}
|
||||
return snap, nil
|
||||
}
|
||||
|
||||
// findEOCD locates the EOCD record by scanning back from the end of data,
|
||||
// matching both the signature and the trailing comment-length field.
|
||||
func findEOCD(data []byte) (uint64, error) {
|
||||
if len(data) < directoryEndLen {
|
||||
return 0, errors.New("zip: too short for EOCD")
|
||||
}
|
||||
maxComment := uint16max
|
||||
lo := len(data) - directoryEndLen
|
||||
hi := lo
|
||||
if hi > maxComment {
|
||||
lo = hi - maxComment
|
||||
} else {
|
||||
lo = 0
|
||||
}
|
||||
for i := hi; i >= lo; i-- {
|
||||
if le.Uint32(data[i:]) != directoryEndSignature {
|
||||
continue
|
||||
}
|
||||
cl := int(le.Uint16(data[i+20:]))
|
||||
if i+directoryEndLen+cl == len(data) {
|
||||
return uint64(i), nil
|
||||
}
|
||||
}
|
||||
return 0, errors.New("zip: EOCD not found")
|
||||
}
|
||||
|
||||
// hexDump returns a short hex dump of data for failure messages.
|
||||
func hexDump(data []byte) string {
|
||||
if len(data) > 4096 {
|
||||
data = data[len(data)-4096:]
|
||||
}
|
||||
var b strings.Builder
|
||||
for i := 0; i < len(data); i += 16 {
|
||||
end := min(i+16, len(data))
|
||||
fmt.Fprintf(&b, "%04x % x\n", i, data[i:end])
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// TestZip64LFHBothPlaceholders covers the [Writer.CreateRaw] + no-data-
|
||||
// descriptor path where the entry's uncompressed or compressed size exceeds
|
||||
// 4 GiB. The Local File Header carries a Zip64 extra with both 8-byte
|
||||
// USize64 and CSize64 sub-fields (matching Info-ZIP), so per APPNOTE 4.5.3
|
||||
// both 32-bit size fields in the LFH must be the 0xFFFFFFFF placeholder —
|
||||
// even if only one of the sizes actually overflows.
|
||||
func TestZip64LFHBothPlaceholders(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
w := NewWriter(&buf)
|
||||
fh := &FileHeader{
|
||||
Name: "x",
|
||||
Method: Deflate,
|
||||
CompressedSize64: 1024,
|
||||
UncompressedSize64: 5 << 30, // > 4 GiB
|
||||
}
|
||||
fw, err := w.CreateRaw(fh)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := io.CopyN(fw, zeros{}, int64(fh.CompressedSize64)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := w.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
b := buf.Bytes()
|
||||
if got := le.Uint32(b[14:18]); got != fh.CRC32 {
|
||||
t.Errorf("LFH CRC32 = %#x, want %#x", got, fh.CRC32)
|
||||
}
|
||||
if got := le.Uint32(b[18:22]); got != uint32max {
|
||||
t.Errorf("LFH CompressedSize = %#x, want %#x (placeholder)", got, uint32(uint32max))
|
||||
}
|
||||
if got := le.Uint32(b[22:26]); got != uint32max {
|
||||
t.Errorf("LFH UncompressedSize = %#x, want %#x (placeholder)", got, uint32(uint32max))
|
||||
}
|
||||
|
||||
// The Zip64 LFH extra should carry both 64-bit sub-fields in
|
||||
// USize64-then-CSize64 order.
|
||||
nameLen := uint64(le.Uint16(b[26:28]))
|
||||
extraLen := uint64(le.Uint16(b[28:30]))
|
||||
if want := uint64(20); extraLen != want {
|
||||
t.Fatalf("LFH extra length = %d, want %d", extraLen, want)
|
||||
}
|
||||
extra := b[30+nameLen : 30+nameLen+extraLen]
|
||||
if tag := le.Uint16(extra[:2]); tag != zip64ExtraID {
|
||||
t.Errorf("Zip64 extra tag = %#x, want %#x", tag, zip64ExtraID)
|
||||
}
|
||||
if dataLen := le.Uint16(extra[2:4]); dataLen != 16 {
|
||||
t.Errorf("Zip64 extra data length = %d, want 16", dataLen)
|
||||
}
|
||||
if got := le.Uint64(extra[4:12]); got != fh.UncompressedSize64 {
|
||||
t.Errorf("Zip64 USize64 = %d, want %d", got, fh.UncompressedSize64)
|
||||
}
|
||||
if got := le.Uint64(extra[12:20]); got != fh.CompressedSize64 {
|
||||
t.Errorf("Zip64 CSize64 = %d, want %d", got, fh.CompressedSize64)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue