diff --git a/src/encoding/json/stream_test.go b/src/encoding/json/stream_test.go index 9e5d48d39d2..0e937cfaa13 100644 --- a/src/encoding/json/stream_test.go +++ b/src/encoding/json/stream_test.go @@ -557,3 +557,67 @@ func TestTokenTruncation(t *testing.T) { } } } + +func TestDecoderInputOffset(t *testing.T) { + const input = ` [ + [ ] , [ "one" ] , [ "one" , "two" ] , + { } , { "alpha" : "bravo" } , { "alpha" : "bravo" , "fizz" : "buzz" } + ] ` + wantOffsets := []int64{ + 0, 1, 2, 5, 6, 7, 8, 9, 12, 13, 18, 19, 20, 21, 24, 25, 30, 31, + 38, 39, 40, 41, 46, 47, 48, 49, 52, 53, 60, 61, 70, 71, 72, 73, + 76, 77, 84, 85, 94, 95, 103, 104, 112, 113, 114, 116, 117, 117, + 117, 117, + } + wantMores := []bool{ + true, true, false, true, true, false, true, true, true, false, + true, false, true, true, true, false, true, true, true, true, + true, false, false, false, false, + } + + d := NewDecoder(strings.NewReader(input)) + checkOffset := func() { + t.Helper() + got := d.InputOffset() + if len(wantOffsets) == 0 { + t.Fatalf("InputOffset = %d, want nil", got) + } + want := wantOffsets[0] + if got != want { + t.Fatalf("InputOffset = %d, want %d", got, want) + } + wantOffsets = wantOffsets[1:] + } + checkMore := func() { + t.Helper() + got := d.More() + if len(wantMores) == 0 { + t.Fatalf("More = %v, want nil", got) + } + want := wantMores[0] + if got != want { + t.Fatalf("More = %v, want %v", got, want) + } + wantMores = wantMores[1:] + } + checkOffset() + checkMore() + checkOffset() + for { + if _, err := d.Token(); err == io.EOF { + break + } else if err != nil { + t.Fatalf("Token error: %v", err) + } + checkOffset() + checkMore() + checkOffset() + } + checkOffset() + checkMore() + checkOffset() + + if len(wantOffsets)+len(wantMores) > 0 { + t.Fatal("unconsumed testdata") + } +} diff --git a/src/encoding/json/v2_stream.go b/src/encoding/json/v2_stream.go index 28e72c0a529..dcee553ee13 100644 --- a/src/encoding/json/v2_stream.go +++ b/src/encoding/json/v2_stream.go @@ -20,6 +20,10 @@ type Decoder struct { dec *jsontext.Decoder opts jsonv2.Options err error + + // hadPeeked reports whether [Decoder.More] was called. + // It is reset by [Decoder.Decode] and [Decoder.Token]. + hadPeeked bool } // NewDecoder returns a new decoder that reads from r. @@ -76,6 +80,7 @@ func (dec *Decoder) Decode(v any) error { } return dec.err } + dec.hadPeeked = false return jsonv2.Unmarshal(b, v, dec.opts) } @@ -206,6 +211,7 @@ func (dec *Decoder) Token() (Token, error) { } return nil, transformSyntacticError(err) } + dec.hadPeeked = false switch k := tok.Kind(); k { case 'n': return nil, nil @@ -230,6 +236,7 @@ func (dec *Decoder) Token() (Token, error) { // More reports whether there is another element in the // current array or object being parsed. func (dec *Decoder) More() bool { + dec.hadPeeked = true k := dec.dec.PeekKind() return k > 0 && k != ']' && k != '}' } @@ -238,5 +245,18 @@ func (dec *Decoder) More() bool { // The offset gives the location of the end of the most recently returned token // and the beginning of the next token. func (dec *Decoder) InputOffset() int64 { - return dec.dec.InputOffset() + offset := dec.dec.InputOffset() + if dec.hadPeeked { + // Historically, InputOffset reported the location of + // the end of the most recently returned token + // unless [Decoder.More] is called, in which case, it reported + // the beginning of the next token. + unreadBuffer := dec.dec.UnreadBuffer() + trailingTokens := bytes.TrimLeft(unreadBuffer, " \n\r\t") + if len(trailingTokens) > 0 { + leadingWhitespace := len(unreadBuffer) - len(trailingTokens) + offset += int64(leadingWhitespace) + } + } + return offset } diff --git a/src/encoding/json/v2_stream_test.go b/src/encoding/json/v2_stream_test.go index b8951f82054..0885631fb59 100644 --- a/src/encoding/json/v2_stream_test.go +++ b/src/encoding/json/v2_stream_test.go @@ -537,3 +537,67 @@ func TestTokenTruncation(t *testing.T) { } } } + +func TestDecoderInputOffset(t *testing.T) { + const input = ` [ + [ ] , [ "one" ] , [ "one" , "two" ] , + { } , { "alpha" : "bravo" } , { "alpha" : "bravo" , "fizz" : "buzz" } + ] ` + wantOffsets := []int64{ + 0, 1, 2, 5, 6, 7, 8, 9, 12, 13, 18, 19, 20, 21, 24, 25, 30, 31, + 38, 39, 40, 41, 46, 47, 48, 49, 52, 53, 60, 61, 70, 71, 72, 73, + 76, 77, 84, 85, 94, 95, 103, 104, 112, 113, 114, 116, 117, 117, + 117, 117, + } + wantMores := []bool{ + true, true, false, true, true, false, true, true, true, false, + true, false, true, true, true, false, true, true, true, true, + true, false, false, false, false, + } + + d := NewDecoder(strings.NewReader(input)) + checkOffset := func() { + t.Helper() + got := d.InputOffset() + if len(wantOffsets) == 0 { + t.Fatalf("InputOffset = %d, want nil", got) + } + want := wantOffsets[0] + if got != want { + t.Fatalf("InputOffset = %d, want %d", got, want) + } + wantOffsets = wantOffsets[1:] + } + checkMore := func() { + t.Helper() + got := d.More() + if len(wantMores) == 0 { + t.Fatalf("More = %v, want nil", got) + } + want := wantMores[0] + if got != want { + t.Fatalf("More = %v, want %v", got, want) + } + wantMores = wantMores[1:] + } + checkOffset() + checkMore() + checkOffset() + for { + if _, err := d.Token(); err == io.EOF { + break + } else if err != nil { + t.Fatalf("Token error: %v", err) + } + checkOffset() + checkMore() + checkOffset() + } + checkOffset() + checkMore() + checkOffset() + + if len(wantOffsets)+len(wantMores) > 0 { + t.Fatal("unconsumed testdata") + } +}