index/suffixarray: report error rather than panic for corrupted data

If the encoded suffixarray data had more indices than the (suffix) array length, Index.Read panicked with an index-out-of-bounds error. Guards against that panic and report an error instead. While at it, change the error string used for other errors from "data too large" to "corrupted data" which is more versatile. Added testcase with detailed description. Fixes #53352. Change-Id: I9b1ba04c408b8d51943f7a29abf429fbe743fd3d Reviewed-on: https://go-review.googlesource.com/c/go/+/771781 Reviewed-by: Mark Freeman <markfreeman@google.com> LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Robert Griesemer <gri@google.com> Auto-Submit: Robert Griesemer <gri@google.com>
2026-06-27 03:11:23 +00:00 · 2026-04-28 14:51:14 -07:00 · 2026-04-28 14:51:14 -07:00 · 2098279730
commit 2098279730
parent 4e4b780652
2 changed files with 57 additions and 3 deletions
--- a/src/index/suffixarray/suffixarray.go
+++ b/src/index/suffixarray/suffixarray.go
@ -117,7 +117,7 @@ func writeSlice(w io.Writer, buf []byte, data ints) (n int, err error) {
 	return
 }

-var errTooBig = errors.New("suffixarray: data too large")
+var errCorrupted = errors.New("suffixarray: data corrupted")

 // readSlice reads data[:n] from r and returns n.
 // It uses buf to buffer the read.
@ -130,7 +130,7 @@ func readSlice(r io.Reader, buf []byte, data ints) (n int, err error) {
 	}
 	if int64(int(size64)) != size64 || int(size64) < 0 {
 		// We never write chunks this big anyway.
-		return 0, errTooBig
+		return 0, errCorrupted
 	}
 	size := int(size64)

@ -140,8 +140,14 @@ func readSlice(r io.Reader, buf []byte, data ints) (n int, err error) {
 	}

 	// decode as many elements as present in buf
+	len := data.len()
 	for p := binary.MaxVarintLen64; p < size; n++ {
 		x, w := binary.Uvarint(buf[p:])
+		// prevent index-out-of-bounds panic if there are more indices than expected
+		// (was go.dev/issue/53352)
+		if n >= len {
+			return n, errCorrupted
+		}
 		data.set(n, int64(x))
 		p += w
 	}
@ -162,7 +168,7 @@ func (x *Index) Read(r io.Reader) error {
 		return err
 	}
 	if int64(int(n64)) != n64 || int(n64) < 0 {
-		return errTooBig
+		return errCorrupted
 	}
 	n := int(n64)

--- a/src/index/suffixarray/suffixarray_test.go
+++ b/src/index/suffixarray/suffixarray_test.go
@ -614,3 +614,51 @@ func BenchmarkSaveRestore(b *testing.B) {
 		})
 	}
 }
+
+func TestIssue53352(t *testing.T) {
+	data := []byte("x")
+	index := New(data)
+	var buf bytes.Buffer
+	if err := index.Write(&buf); err != nil {
+		t.Fatal(err)
+	}
+
+	// buffer encoding is as follows:
+	//
+	// [ data length n | data bytes | index slice buffer size |  indices  ] ... (next slice, if any)
+	// \__ 10 bytes __/\_ n bytes _/\_______ 10 bytes _______/\_ varuints /
+	//
+	// n and s are encoded as varints using 10 bytes always so they can be patched.
+	// For small values x >= 0 the varint encoded value is 2*x.
+
+	// For the above data we have n == len("x") == 1.
+	n := len(data)
+	encoding := buf.Bytes()
+	if got := int(encoding[0]); got != 2*n {
+		t.Fatalf("got n = %d; want %d", got, 2*n)
+	}
+
+	// For the above data, the index slice buffer contains a single index entry (0 for "x")
+	// plus the size of the index buffer (10 bytes), so s == 10 + 1 == 11; and s is encoded
+	// immediately following the data bytes.
+	s := 10 + 1
+	if got := int(encoding[10+n]); got != 2*s {
+		t.Fatalf("got s = %d; want %d", got, 2*s)
+	}
+
+	// Reading back the encoding should work without errors.
+	if err := index.Read(bytes.NewBuffer(encoding)); err != nil {
+		t.Fatal(err)
+	}
+
+	// Adding an extra index corrupts the encoding (more indices than data bytes to be indexed).
+	s++                            // increase slice buffer size
+	encoding = append(encoding, 0) // add one more index
+	encoding[10+n] = byte(2 * s)   // update index slice buffer size
+
+	// Reading back the corrupted encoding should report an error.
+	// Before fixing go.dev/issue/53352, this resulted in an index-out-of-range panic.
+	if err := index.Read(bytes.NewBuffer(encoding)); err != errCorrupted {
+		t.Fatalf("got %q; want %q", err, errCorrupted)
+	}
+}