encoding/json/v2: report EOF for top-level values in UnmarshalDecode

The fully streaming UnmarshalJSONFrom method and UnmarshalFromFunc
introduce an edge case where they can encounter EOF in the stream,
where it should be reported upstream as EOF rather than
ErrUnexpectedEOF or be wrapped within a SemanticError.

This is not possible with other unmarshal methods since the
"json" package would read the appropriate JSON value
before calling the custom method or function.

To avoid custom unmarshal methods from encountering EOF,
check whether the stream is already at EOF for top-level values
before calling the custom method.

Also, when wrapping EOF within a SemanticError, convert it
to ErrUnexpectedEOF to better indicate that this is unexpected.

Fixes #75802

Change-Id: I001396734b7e95b5337f77b71326284974ee730a
Reviewed-on: https://go-review.googlesource.com/c/go/+/710877
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Johan Brandhorst-Satzkorn <johan.brandhorst@gmail.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
This commit is contained in:
Joe Tsai 2025-10-10 17:56:04 -07:00 committed by Joseph Tsai
parent 6bcd97d9f4
commit 0e64ee1286
6 changed files with 68 additions and 5 deletions

View file

@ -792,6 +792,12 @@ func (d *decoderState) CheckNextValue(last bool) error {
return nil return nil
} }
// AtEOF reports whether the decoder is at EOF.
func (d *decoderState) AtEOF() bool {
_, err := d.consumeWhitespace(d.prevEnd)
return err == io.ErrUnexpectedEOF
}
// CheckEOF verifies that the input has no more data. // CheckEOF verifies that the input has no more data.
func (d *decoderState) CheckEOF() error { func (d *decoderState) CheckEOF() error {
return d.checkEOF(d.prevEnd) return d.checkEOF(d.prevEnd)

View file

@ -440,8 +440,9 @@ func UnmarshalRead(in io.Reader, out any, opts ...Options) (err error) {
// Unlike [Unmarshal] and [UnmarshalRead], decode options are ignored because // Unlike [Unmarshal] and [UnmarshalRead], decode options are ignored because
// they must have already been specified on the provided [jsontext.Decoder]. // they must have already been specified on the provided [jsontext.Decoder].
// //
// The input may be a stream of one or more JSON values, // The input may be a stream of zero or more JSON values,
// where this only unmarshals the next JSON value in the stream. // where this only unmarshals the next JSON value in the stream.
// If there are no more top-level JSON values, it reports [io.EOF].
// The output must be a non-nil pointer. // The output must be a non-nil pointer.
// See [Unmarshal] for details about the conversion of JSON into a Go value. // See [Unmarshal] for details about the conversion of JSON into a Go value.
func UnmarshalDecode(in *jsontext.Decoder, out any, opts ...Options) (err error) { func UnmarshalDecode(in *jsontext.Decoder, out any, opts ...Options) (err error) {

View file

@ -9,6 +9,7 @@ package json
import ( import (
"errors" "errors"
"fmt" "fmt"
"io"
"reflect" "reflect"
"sync" "sync"
@ -306,6 +307,9 @@ func UnmarshalFromFunc[T any](fn func(*jsontext.Decoder, T) error) *Unmarshalers
fnc: func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error { fnc: func(dec *jsontext.Decoder, va addressableValue, uo *jsonopts.Struct) error {
xd := export.Decoder(dec) xd := export.Decoder(dec)
prevDepth, prevLength := xd.Tokens.DepthLength() prevDepth, prevLength := xd.Tokens.DepthLength()
if prevDepth == 1 && xd.AtEOF() {
return io.EOF // check EOF early to avoid fn reporting an EOF
}
xd.Flags.Set(jsonflags.WithinArshalCall | 1) xd.Flags.Set(jsonflags.WithinArshalCall | 1)
v, _ := reflect.TypeAssert[T](va.castTo(t)) v, _ := reflect.TypeAssert[T](va.castTo(t))
err := fn(dec, v) err := fn(dec, v)

View file

@ -9,6 +9,7 @@ package json
import ( import (
"encoding" "encoding"
"errors" "errors"
"io"
"reflect" "reflect"
"encoding/json/internal" "encoding/json/internal"
@ -302,6 +303,9 @@ func makeMethodArshaler(fncs *arshaler, t reflect.Type) *arshaler {
} }
xd := export.Decoder(dec) xd := export.Decoder(dec)
prevDepth, prevLength := xd.Tokens.DepthLength() prevDepth, prevLength := xd.Tokens.DepthLength()
if prevDepth == 1 && xd.AtEOF() {
return io.EOF // check EOF early to avoid fn reporting an EOF
}
xd.Flags.Set(jsonflags.WithinArshalCall | 1) xd.Flags.Set(jsonflags.WithinArshalCall | 1)
unmarshaler, _ := reflect.TypeAssert[UnmarshalerFrom](va.Addr()) unmarshaler, _ := reflect.TypeAssert[UnmarshalerFrom](va.Addr())
err := unmarshaler.UnmarshalJSONFrom(dec) err := unmarshaler.UnmarshalJSONFrom(dec)

View file

@ -7834,6 +7834,7 @@ func TestUnmarshal(t *testing.T) {
wantErr: EU(errSomeError).withType(0, T[unmarshalJSONv2Func]()), wantErr: EU(errSomeError).withType(0, T[unmarshalJSONv2Func]()),
}, { }, {
name: jsontest.Name("Methods/Invalid/JSONv2/TooFew"), name: jsontest.Name("Methods/Invalid/JSONv2/TooFew"),
inBuf: `{}`,
inVal: addr(unmarshalJSONv2Func(func(*jsontext.Decoder) error { inVal: addr(unmarshalJSONv2Func(func(*jsontext.Decoder) error {
return nil // do nothing return nil // do nothing
})), })),
@ -9234,6 +9235,43 @@ func TestUnmarshalReuse(t *testing.T) {
}) })
} }
type unmarshalerEOF struct{}
func (unmarshalerEOF) UnmarshalJSONFrom(dec *jsontext.Decoder) error {
return io.EOF // should be wrapped and converted by Unmarshal to io.ErrUnexpectedEOF
}
// TestUnmarshalEOF verifies that io.EOF is only ever returned by
// UnmarshalDecode for a top-level value.
func TestUnmarshalEOF(t *testing.T) {
opts := WithUnmarshalers(UnmarshalFromFunc(func(dec *jsontext.Decoder, _ *struct{}) error {
return io.EOF // should be wrapped and converted by Unmarshal to io.ErrUnexpectedEOF
}))
for _, in := range []string{"", "[", "[null", "[null]"} {
for _, newOut := range []func() any{
func() any { return new(unmarshalerEOF) },
func() any { return new([]unmarshalerEOF) },
func() any { return new(struct{}) },
func() any { return new([]struct{}) },
} {
wantErr := io.ErrUnexpectedEOF
if gotErr := Unmarshal([]byte(in), newOut(), opts); !errors.Is(gotErr, wantErr) {
t.Errorf("Unmarshal = %v, want %v", gotErr, wantErr)
}
if gotErr := UnmarshalRead(strings.NewReader(in), newOut(), opts); !errors.Is(gotErr, wantErr) {
t.Errorf("Unmarshal = %v, want %v", gotErr, wantErr)
}
switch gotErr := UnmarshalDecode(jsontext.NewDecoder(strings.NewReader(in)), newOut(), opts); {
case in != "" && !errors.Is(gotErr, wantErr):
t.Errorf("Unmarshal = %v, want %v", gotErr, wantErr)
case in == "" && gotErr != io.EOF:
t.Errorf("Unmarshal = %v, want %v", gotErr, io.EOF)
}
}
}
}
type ReaderFunc func([]byte) (int, error) type ReaderFunc func([]byte) (int, error)
func (f ReaderFunc) Read(b []byte) (int, error) { return f(b) } func (f ReaderFunc) Read(b []byte) (int, error) { return f(b) }

View file

@ -10,6 +10,7 @@ import (
"cmp" "cmp"
"errors" "errors"
"fmt" "fmt"
"io"
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
@ -118,7 +119,7 @@ func newInvalidFormatError(c coder, t reflect.Type) error {
// newMarshalErrorBefore wraps err in a SemanticError assuming that e // newMarshalErrorBefore wraps err in a SemanticError assuming that e
// is positioned right before the next token or value, which causes an error. // is positioned right before the next token or value, which causes an error.
func newMarshalErrorBefore(e *jsontext.Encoder, t reflect.Type, err error) error { func newMarshalErrorBefore(e *jsontext.Encoder, t reflect.Type, err error) error {
return &SemanticError{action: "marshal", GoType: t, Err: err, return &SemanticError{action: "marshal", GoType: t, Err: toUnexpectedEOF(err),
ByteOffset: e.OutputOffset() + int64(export.Encoder(e).CountNextDelimWhitespace()), ByteOffset: e.OutputOffset() + int64(export.Encoder(e).CountNextDelimWhitespace()),
JSONPointer: jsontext.Pointer(export.Encoder(e).AppendStackPointer(nil, +1))} JSONPointer: jsontext.Pointer(export.Encoder(e).AppendStackPointer(nil, +1))}
} }
@ -134,7 +135,7 @@ func newUnmarshalErrorBefore(d *jsontext.Decoder, t reflect.Type, err error) err
if export.Decoder(d).Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) { if export.Decoder(d).Flags.Get(jsonflags.ReportErrorsWithLegacySemantics) {
k = d.PeekKind() k = d.PeekKind()
} }
return &SemanticError{action: "unmarshal", GoType: t, Err: err, return &SemanticError{action: "unmarshal", GoType: t, Err: toUnexpectedEOF(err),
ByteOffset: d.InputOffset() + int64(export.Decoder(d).CountNextDelimWhitespace()), ByteOffset: d.InputOffset() + int64(export.Decoder(d).CountNextDelimWhitespace()),
JSONPointer: jsontext.Pointer(export.Decoder(d).AppendStackPointer(nil, +1)), JSONPointer: jsontext.Pointer(export.Decoder(d).AppendStackPointer(nil, +1)),
JSONKind: k} JSONKind: k}
@ -157,7 +158,7 @@ func newUnmarshalErrorBeforeWithSkipping(d *jsontext.Decoder, t reflect.Type, er
// is positioned right after the previous token or value, which caused an error. // is positioned right after the previous token or value, which caused an error.
func newUnmarshalErrorAfter(d *jsontext.Decoder, t reflect.Type, err error) error { func newUnmarshalErrorAfter(d *jsontext.Decoder, t reflect.Type, err error) error {
tokOrVal := export.Decoder(d).PreviousTokenOrValue() tokOrVal := export.Decoder(d).PreviousTokenOrValue()
return &SemanticError{action: "unmarshal", GoType: t, Err: err, return &SemanticError{action: "unmarshal", GoType: t, Err: toUnexpectedEOF(err),
ByteOffset: d.InputOffset() - int64(len(tokOrVal)), ByteOffset: d.InputOffset() - int64(len(tokOrVal)),
JSONPointer: jsontext.Pointer(export.Decoder(d).AppendStackPointer(nil, -1)), JSONPointer: jsontext.Pointer(export.Decoder(d).AppendStackPointer(nil, -1)),
JSONKind: jsontext.Value(tokOrVal).Kind()} JSONKind: jsontext.Value(tokOrVal).Kind()}
@ -206,6 +207,7 @@ func newSemanticErrorWithPosition(c coder, t reflect.Type, prevDepth int, prevLe
if serr == nil { if serr == nil {
serr = &SemanticError{Err: err} serr = &SemanticError{Err: err}
} }
serr.Err = toUnexpectedEOF(serr.Err)
var currDepth int var currDepth int
var currLength int64 var currLength int64
var coderState interface{ AppendStackPointer([]byte, int) []byte } var coderState interface{ AppendStackPointer([]byte, int) []byte }
@ -432,3 +434,11 @@ func newDuplicateNameError(ptr jsontext.Pointer, quotedName []byte, offset int64
Err: jsontext.ErrDuplicateName, Err: jsontext.ErrDuplicateName,
} }
} }
// toUnexpectedEOF converts [io.EOF] to [io.ErrUnexpectedEOF].
func toUnexpectedEOF(err error) error {
if err == io.EOF {
return io.ErrUnexpectedEOF
}
return err
}