From 633dd1d475e7346b43d87abc987a8c7f256e827d Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Mon, 15 Sep 2025 12:56:11 -0700 Subject: [PATCH] encoding/json: fix Decoder.InputOffset regression in goexperiment.jsonv2 The Decoder.InputOffset method was always ambiguous about the exact offset returned since anything between the end of the previous token to the start of the next token could be a valid result. Empirically, it seems that the behavior was to report the end of the previous token unless Decoder.More is called, in which case it reports the start of the next token. This is an odd semantic since a relatively side-effect free method like More is not quite so side-effect free. However, our goal is to preserve historical v1 semantic when possible regardless of whether it made sense. Note that jsontext.Decoder.InputOffset consistently always reports the end of the previous token. Users can explicitly choose the exact position they want by inspecting the UnreadBuffer. Fixes #75468 Change-Id: I1e946e83c9d29dfc09f2913ff8d6b2b80632f292 Reviewed-on: https://go-review.googlesource.com/c/go/+/703856 Reviewed-by: Damien Neil LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- src/encoding/json/stream_test.go | 64 +++++++++++++++++++++++++++++ src/encoding/json/v2_stream.go | 22 +++++++++- src/encoding/json/v2_stream_test.go | 64 +++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) diff --git a/src/encoding/json/stream_test.go b/src/encoding/json/stream_test.go index 9e5d48d39d2..0e937cfaa13 100644 --- a/src/encoding/json/stream_test.go +++ b/src/encoding/json/stream_test.go @@ -557,3 +557,67 @@ func TestTokenTruncation(t *testing.T) { } } } + +func TestDecoderInputOffset(t *testing.T) { + const input = ` [ + [ ] , [ "one" ] , [ "one" , "two" ] , + { } , { "alpha" : "bravo" } , { "alpha" : "bravo" , "fizz" : "buzz" } + ] ` + wantOffsets := []int64{ + 0, 1, 2, 5, 6, 7, 8, 9, 12, 13, 18, 19, 20, 21, 24, 25, 30, 31, + 38, 39, 40, 41, 46, 47, 48, 49, 52, 53, 60, 61, 70, 71, 72, 73, + 76, 77, 84, 85, 94, 95, 103, 104, 112, 113, 114, 116, 117, 117, + 117, 117, + } + wantMores := []bool{ + true, true, false, true, true, false, true, true, true, false, + true, false, true, true, true, false, true, true, true, true, + true, false, false, false, false, + } + + d := NewDecoder(strings.NewReader(input)) + checkOffset := func() { + t.Helper() + got := d.InputOffset() + if len(wantOffsets) == 0 { + t.Fatalf("InputOffset = %d, want nil", got) + } + want := wantOffsets[0] + if got != want { + t.Fatalf("InputOffset = %d, want %d", got, want) + } + wantOffsets = wantOffsets[1:] + } + checkMore := func() { + t.Helper() + got := d.More() + if len(wantMores) == 0 { + t.Fatalf("More = %v, want nil", got) + } + want := wantMores[0] + if got != want { + t.Fatalf("More = %v, want %v", got, want) + } + wantMores = wantMores[1:] + } + checkOffset() + checkMore() + checkOffset() + for { + if _, err := d.Token(); err == io.EOF { + break + } else if err != nil { + t.Fatalf("Token error: %v", err) + } + checkOffset() + checkMore() + checkOffset() + } + checkOffset() + checkMore() + checkOffset() + + if len(wantOffsets)+len(wantMores) > 0 { + t.Fatal("unconsumed testdata") + } +} diff --git a/src/encoding/json/v2_stream.go b/src/encoding/json/v2_stream.go index 28e72c0a529..dcee553ee13 100644 --- a/src/encoding/json/v2_stream.go +++ b/src/encoding/json/v2_stream.go @@ -20,6 +20,10 @@ type Decoder struct { dec *jsontext.Decoder opts jsonv2.Options err error + + // hadPeeked reports whether [Decoder.More] was called. + // It is reset by [Decoder.Decode] and [Decoder.Token]. + hadPeeked bool } // NewDecoder returns a new decoder that reads from r. @@ -76,6 +80,7 @@ func (dec *Decoder) Decode(v any) error { } return dec.err } + dec.hadPeeked = false return jsonv2.Unmarshal(b, v, dec.opts) } @@ -206,6 +211,7 @@ func (dec *Decoder) Token() (Token, error) { } return nil, transformSyntacticError(err) } + dec.hadPeeked = false switch k := tok.Kind(); k { case 'n': return nil, nil @@ -230,6 +236,7 @@ func (dec *Decoder) Token() (Token, error) { // More reports whether there is another element in the // current array or object being parsed. func (dec *Decoder) More() bool { + dec.hadPeeked = true k := dec.dec.PeekKind() return k > 0 && k != ']' && k != '}' } @@ -238,5 +245,18 @@ func (dec *Decoder) More() bool { // The offset gives the location of the end of the most recently returned token // and the beginning of the next token. func (dec *Decoder) InputOffset() int64 { - return dec.dec.InputOffset() + offset := dec.dec.InputOffset() + if dec.hadPeeked { + // Historically, InputOffset reported the location of + // the end of the most recently returned token + // unless [Decoder.More] is called, in which case, it reported + // the beginning of the next token. + unreadBuffer := dec.dec.UnreadBuffer() + trailingTokens := bytes.TrimLeft(unreadBuffer, " \n\r\t") + if len(trailingTokens) > 0 { + leadingWhitespace := len(unreadBuffer) - len(trailingTokens) + offset += int64(leadingWhitespace) + } + } + return offset } diff --git a/src/encoding/json/v2_stream_test.go b/src/encoding/json/v2_stream_test.go index b8951f82054..0885631fb59 100644 --- a/src/encoding/json/v2_stream_test.go +++ b/src/encoding/json/v2_stream_test.go @@ -537,3 +537,67 @@ func TestTokenTruncation(t *testing.T) { } } } + +func TestDecoderInputOffset(t *testing.T) { + const input = ` [ + [ ] , [ "one" ] , [ "one" , "two" ] , + { } , { "alpha" : "bravo" } , { "alpha" : "bravo" , "fizz" : "buzz" } + ] ` + wantOffsets := []int64{ + 0, 1, 2, 5, 6, 7, 8, 9, 12, 13, 18, 19, 20, 21, 24, 25, 30, 31, + 38, 39, 40, 41, 46, 47, 48, 49, 52, 53, 60, 61, 70, 71, 72, 73, + 76, 77, 84, 85, 94, 95, 103, 104, 112, 113, 114, 116, 117, 117, + 117, 117, + } + wantMores := []bool{ + true, true, false, true, true, false, true, true, true, false, + true, false, true, true, true, false, true, true, true, true, + true, false, false, false, false, + } + + d := NewDecoder(strings.NewReader(input)) + checkOffset := func() { + t.Helper() + got := d.InputOffset() + if len(wantOffsets) == 0 { + t.Fatalf("InputOffset = %d, want nil", got) + } + want := wantOffsets[0] + if got != want { + t.Fatalf("InputOffset = %d, want %d", got, want) + } + wantOffsets = wantOffsets[1:] + } + checkMore := func() { + t.Helper() + got := d.More() + if len(wantMores) == 0 { + t.Fatalf("More = %v, want nil", got) + } + want := wantMores[0] + if got != want { + t.Fatalf("More = %v, want %v", got, want) + } + wantMores = wantMores[1:] + } + checkOffset() + checkMore() + checkOffset() + for { + if _, err := d.Token(); err == io.EOF { + break + } else if err != nil { + t.Fatalf("Token error: %v", err) + } + checkOffset() + checkMore() + checkOffset() + } + checkOffset() + checkMore() + checkOffset() + + if len(wantOffsets)+len(wantMores) > 0 { + t.Fatal("unconsumed testdata") + } +}