index/suffixarray: report error rather than panic for corrupted data

If the encoded suffixarray data had more indices than the (suffix)
array length, Index.Read panicked with an index-out-of-bounds error.
Guards against that panic and report an error instead.

While at it, change the error string used for other errors from
"data too large" to "corrupted data" which is more versatile.

Added testcase with detailed description.

Fixes #53352.

Change-Id: I9b1ba04c408b8d51943f7a29abf429fbe743fd3d
Reviewed-on: https://go-review.googlesource.com/c/go/+/771781
Reviewed-by: Mark Freeman <markfreeman@google.com>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Robert Griesemer <gri@google.com>
Auto-Submit: Robert Griesemer <gri@google.com>
This commit is contained in:
Robert Griesemer 2026-04-28 14:51:14 -07:00 committed by Gopher Robot
parent 4e4b780652
commit 2098279730
2 changed files with 57 additions and 3 deletions

View file

@ -117,7 +117,7 @@ func writeSlice(w io.Writer, buf []byte, data ints) (n int, err error) {
return
}
var errTooBig = errors.New("suffixarray: data too large")
var errCorrupted = errors.New("suffixarray: data corrupted")
// readSlice reads data[:n] from r and returns n.
// It uses buf to buffer the read.
@ -130,7 +130,7 @@ func readSlice(r io.Reader, buf []byte, data ints) (n int, err error) {
}
if int64(int(size64)) != size64 || int(size64) < 0 {
// We never write chunks this big anyway.
return 0, errTooBig
return 0, errCorrupted
}
size := int(size64)
@ -140,8 +140,14 @@ func readSlice(r io.Reader, buf []byte, data ints) (n int, err error) {
}
// decode as many elements as present in buf
len := data.len()
for p := binary.MaxVarintLen64; p < size; n++ {
x, w := binary.Uvarint(buf[p:])
// prevent index-out-of-bounds panic if there are more indices than expected
// (was go.dev/issue/53352)
if n >= len {
return n, errCorrupted
}
data.set(n, int64(x))
p += w
}
@ -162,7 +168,7 @@ func (x *Index) Read(r io.Reader) error {
return err
}
if int64(int(n64)) != n64 || int(n64) < 0 {
return errTooBig
return errCorrupted
}
n := int(n64)

View file

@ -614,3 +614,51 @@ func BenchmarkSaveRestore(b *testing.B) {
})
}
}
func TestIssue53352(t *testing.T) {
data := []byte("x")
index := New(data)
var buf bytes.Buffer
if err := index.Write(&buf); err != nil {
t.Fatal(err)
}
// buffer encoding is as follows:
//
// [ data length n | data bytes | index slice buffer size | indices ] ... (next slice, if any)
// \__ 10 bytes __/\_ n bytes _/\_______ 10 bytes _______/\_ varuints /
//
// n and s are encoded as varints using 10 bytes always so they can be patched.
// For small values x >= 0 the varint encoded value is 2*x.
// For the above data we have n == len("x") == 1.
n := len(data)
encoding := buf.Bytes()
if got := int(encoding[0]); got != 2*n {
t.Fatalf("got n = %d; want %d", got, 2*n)
}
// For the above data, the index slice buffer contains a single index entry (0 for "x")
// plus the size of the index buffer (10 bytes), so s == 10 + 1 == 11; and s is encoded
// immediately following the data bytes.
s := 10 + 1
if got := int(encoding[10+n]); got != 2*s {
t.Fatalf("got s = %d; want %d", got, 2*s)
}
// Reading back the encoding should work without errors.
if err := index.Read(bytes.NewBuffer(encoding)); err != nil {
t.Fatal(err)
}
// Adding an extra index corrupts the encoding (more indices than data bytes to be indexed).
s++ // increase slice buffer size
encoding = append(encoding, 0) // add one more index
encoding[10+n] = byte(2 * s) // update index slice buffer size
// Reading back the corrupted encoding should report an error.
// Before fixing go.dev/issue/53352, this resulted in an index-out-of-range panic.
if err := index.Read(bytes.NewBuffer(encoding)); err != errCorrupted {
t.Fatalf("got %q; want %q", err, errCorrupted)
}
}