go/src/encoding/json/stream_test.go
Daniel Martí b1a48af7e8 encoding/json: properly encode strings with ",string" again
golang.org/cl/193604 fixed one bug when one encodes a string with the
",string" option: if SetEscapeHTML(false) is used, we should not be
using HTML escaping for the inner string encoding. The CL correctly
fixed that.

The CL also tried to speed up this edge case. By avoiding an entire new
call to Marshal, the new Issue34127 benchmark reduced its time/op by
45%, and lowered the allocs/op from 3 to 2.

However, that last optimization wasn't correct:

	Since Go 1.2 every string can be marshaled to JSON without error
	even if it contains invalid UTF-8 byte sequences. Therefore
	there is no need to use Marshal again for the only reason of
	enclosing the string in double quotes.

JSON string encoding isn't just about adding quotes and taking care of
invalid UTF-8. We also need to escape some characters, like tabs and
newlines.

The new code failed to do that. The bug resulted in the added test case
failing to roundtrip properly; before our fix here, we'd see an error:

	invalid use of ,string struct tag, trying to unmarshal "\"\b\f\n\r\t\"\\\"" into string

If you pay close attention, you'll notice that the special characters
like tab and newline are only encoded once, not twice. When decoding
with the ",string" option, the outer string decode works, but the inner
string decode fails, as we are now decoding a JSON string with unescaped
special characters.

The fix we apply here isn't to go back to Marshal, as that would
re-introduce the bug with SetEscapeHTML(false). Instead, we can use a
new encode state from the pool - it results in minimal performance
impact, and even reduces allocs/op further. The performance impact seems
fair, given that we need to check the entire string for characters that
need to be escaped.

	name          old time/op    new time/op    delta
	Issue34127-8    89.7ns ± 2%   100.8ns ± 1%  +12.27%  (p=0.000 n=8+8)

	name          old alloc/op   new alloc/op   delta
	Issue34127-8     40.0B ± 0%     32.0B ± 0%  -20.00%  (p=0.000 n=8+8)

	name          old allocs/op  new allocs/op  delta
	Issue34127-8      2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=8+8)

Instead of adding another standalone test, we convert an existing
"string tag" test to be table-based, and add another test case there.

One test case from the original CL also had to be amended, due to the
same problem - when escaping '<' due to SetEscapeHTML(true), we need to
end up with double escaping, since we're using ",string".

Fixes #38173.

Change-Id: I2b0df9e4f1d3452fff74fe910e189c930dde4b5b
Reviewed-on: https://go-review.googlesource.com/c/go/+/226498
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
2020-05-08 13:29:11 +00:00

475 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"bytes"
"io"
"io/ioutil"
"log"
"net"
"net/http"
"net/http/httptest"
"reflect"
"strings"
"testing"
)
// Test values for the stream test.
// One of each JSON kind.
var streamTest = []interface{}{
0.1,
"hello",
nil,
true,
false,
[]interface{}{"a", "b", "c"},
map[string]interface{}{"": "Kelvin", "ß": "long s"},
3.14, // another value to make sure something can follow map
}
var streamEncoded = `0.1
"hello"
null
true
false
["a","b","c"]
{"ß":"long s","":"Kelvin"}
3.14
`
func TestEncoder(t *testing.T) {
for i := 0; i <= len(streamTest); i++ {
var buf bytes.Buffer
enc := NewEncoder(&buf)
// Check that enc.SetIndent("", "") turns off indentation.
enc.SetIndent(">", ".")
enc.SetIndent("", "")
for j, v := range streamTest[0:i] {
if err := enc.Encode(v); err != nil {
t.Fatalf("encode #%d: %v", j, err)
}
}
if have, want := buf.String(), nlines(streamEncoded, i); have != want {
t.Errorf("encoding %d items: mismatch", i)
diff(t, []byte(have), []byte(want))
break
}
}
}
var streamEncodedIndent = `0.1
"hello"
null
true
false
[
>."a",
>."b",
>."c"
>]
{
>."ß": "long s",
>."": "Kelvin"
>}
3.14
`
func TestEncoderIndent(t *testing.T) {
var buf bytes.Buffer
enc := NewEncoder(&buf)
enc.SetIndent(">", ".")
for _, v := range streamTest {
enc.Encode(v)
}
if have, want := buf.String(), streamEncodedIndent; have != want {
t.Error("indented encoding mismatch")
diff(t, []byte(have), []byte(want))
}
}
type strMarshaler string
func (s strMarshaler) MarshalJSON() ([]byte, error) {
return []byte(s), nil
}
type strPtrMarshaler string
func (s *strPtrMarshaler) MarshalJSON() ([]byte, error) {
return []byte(*s), nil
}
func TestEncoderSetEscapeHTML(t *testing.T) {
var c C
var ct CText
var tagStruct struct {
Valid int `json:"<>&#! "`
Invalid int `json:"\\"`
}
// This case is particularly interesting, as we force the encoder to
// take the address of the Ptr field to use its MarshalJSON method. This
// is why the '&' is important.
marshalerStruct := &struct {
NonPtr strMarshaler
Ptr strPtrMarshaler
}{`"<str>"`, `"<str>"`}
// https://golang.org/issue/34154
stringOption := struct {
Bar string `json:"bar,string"`
}{`<html>foobar</html>`}
for _, tt := range []struct {
name string
v interface{}
wantEscape string
want string
}{
{"c", c, `"\u003c\u0026\u003e"`, `"<&>"`},
{"ct", ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`},
{`"<&>"`, "<&>", `"\u003c\u0026\u003e"`, `"<&>"`},
{
"tagStruct", tagStruct,
`{"\u003c\u003e\u0026#! ":0,"Invalid":0}`,
`{"<>&#! ":0,"Invalid":0}`,
},
{
`"<str>"`, marshalerStruct,
`{"NonPtr":"\u003cstr\u003e","Ptr":"\u003cstr\u003e"}`,
`{"NonPtr":"<str>","Ptr":"<str>"}`,
},
{
"stringOption", stringOption,
`{"bar":"\"\\u003chtml\\u003efoobar\\u003c/html\\u003e\""}`,
`{"bar":"\"<html>foobar</html>\""}`,
},
} {
var buf bytes.Buffer
enc := NewEncoder(&buf)
if err := enc.Encode(tt.v); err != nil {
t.Errorf("Encode(%s): %s", tt.name, err)
continue
}
if got := strings.TrimSpace(buf.String()); got != tt.wantEscape {
t.Errorf("Encode(%s) = %#q, want %#q", tt.name, got, tt.wantEscape)
}
buf.Reset()
enc.SetEscapeHTML(false)
if err := enc.Encode(tt.v); err != nil {
t.Errorf("SetEscapeHTML(false) Encode(%s): %s", tt.name, err)
continue
}
if got := strings.TrimSpace(buf.String()); got != tt.want {
t.Errorf("SetEscapeHTML(false) Encode(%s) = %#q, want %#q",
tt.name, got, tt.want)
}
}
}
func TestDecoder(t *testing.T) {
for i := 0; i <= len(streamTest); i++ {
// Use stream without newlines as input,
// just to stress the decoder even more.
// Our test input does not include back-to-back numbers.
// Otherwise stripping the newlines would
// merge two adjacent JSON values.
var buf bytes.Buffer
for _, c := range nlines(streamEncoded, i) {
if c != '\n' {
buf.WriteRune(c)
}
}
out := make([]interface{}, i)
dec := NewDecoder(&buf)
for j := range out {
if err := dec.Decode(&out[j]); err != nil {
t.Fatalf("decode #%d/%d: %v", j, i, err)
}
}
if !reflect.DeepEqual(out, streamTest[0:i]) {
t.Errorf("decoding %d items: mismatch", i)
for j := range out {
if !reflect.DeepEqual(out[j], streamTest[j]) {
t.Errorf("#%d: have %v want %v", j, out[j], streamTest[j])
}
}
break
}
}
}
func TestDecoderBuffered(t *testing.T) {
r := strings.NewReader(`{"Name": "Gopher"} extra `)
var m struct {
Name string
}
d := NewDecoder(r)
err := d.Decode(&m)
if err != nil {
t.Fatal(err)
}
if m.Name != "Gopher" {
t.Errorf("Name = %q; want Gopher", m.Name)
}
rest, err := ioutil.ReadAll(d.Buffered())
if err != nil {
t.Fatal(err)
}
if g, w := string(rest), " extra "; g != w {
t.Errorf("Remaining = %q; want %q", g, w)
}
}
func nlines(s string, n int) string {
if n <= 0 {
return ""
}
for i, c := range s {
if c == '\n' {
if n--; n == 0 {
return s[0 : i+1]
}
}
}
return s
}
func TestRawMessage(t *testing.T) {
var data struct {
X float64
Id RawMessage
Y float32
}
const raw = `["\u0056",null]`
const msg = `{"X":0.1,"Id":["\u0056",null],"Y":0.2}`
err := Unmarshal([]byte(msg), &data)
if err != nil {
t.Fatalf("Unmarshal: %v", err)
}
if string([]byte(data.Id)) != raw {
t.Fatalf("Raw mismatch: have %#q want %#q", []byte(data.Id), raw)
}
b, err := Marshal(&data)
if err != nil {
t.Fatalf("Marshal: %v", err)
}
if string(b) != msg {
t.Fatalf("Marshal: have %#q want %#q", b, msg)
}
}
func TestNullRawMessage(t *testing.T) {
var data struct {
X float64
Id RawMessage
IdPtr *RawMessage
Y float32
}
const msg = `{"X":0.1,"Id":null,"IdPtr":null,"Y":0.2}`
err := Unmarshal([]byte(msg), &data)
if err != nil {
t.Fatalf("Unmarshal: %v", err)
}
if want, got := "null", string(data.Id); want != got {
t.Fatalf("Raw mismatch: have %q, want %q", got, want)
}
if data.IdPtr != nil {
t.Fatalf("Raw pointer mismatch: have non-nil, want nil")
}
b, err := Marshal(&data)
if err != nil {
t.Fatalf("Marshal: %v", err)
}
if string(b) != msg {
t.Fatalf("Marshal: have %#q want %#q", b, msg)
}
}
var blockingTests = []string{
`{"x": 1}`,
`[1, 2, 3]`,
}
func TestBlocking(t *testing.T) {
for _, enc := range blockingTests {
r, w := net.Pipe()
go w.Write([]byte(enc))
var val interface{}
// If Decode reads beyond what w.Write writes above,
// it will block, and the test will deadlock.
if err := NewDecoder(r).Decode(&val); err != nil {
t.Errorf("decoding %s: %v", enc, err)
}
r.Close()
w.Close()
}
}
func BenchmarkEncoderEncode(b *testing.B) {
b.ReportAllocs()
type T struct {
X, Y string
}
v := &T{"foo", "bar"}
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
if err := NewEncoder(ioutil.Discard).Encode(v); err != nil {
b.Fatal(err)
}
}
})
}
type tokenStreamCase struct {
json string
expTokens []interface{}
}
type decodeThis struct {
v interface{}
}
var tokenStreamCases = []tokenStreamCase{
// streaming token cases
{json: `10`, expTokens: []interface{}{float64(10)}},
{json: ` [10] `, expTokens: []interface{}{
Delim('['), float64(10), Delim(']')}},
{json: ` [false,10,"b"] `, expTokens: []interface{}{
Delim('['), false, float64(10), "b", Delim(']')}},
{json: `{ "a": 1 }`, expTokens: []interface{}{
Delim('{'), "a", float64(1), Delim('}')}},
{json: `{"a": 1, "b":"3"}`, expTokens: []interface{}{
Delim('{'), "a", float64(1), "b", "3", Delim('}')}},
{json: ` [{"a": 1},{"a": 2}] `, expTokens: []interface{}{
Delim('['),
Delim('{'), "a", float64(1), Delim('}'),
Delim('{'), "a", float64(2), Delim('}'),
Delim(']')}},
{json: `{"obj": {"a": 1}}`, expTokens: []interface{}{
Delim('{'), "obj", Delim('{'), "a", float64(1), Delim('}'),
Delim('}')}},
{json: `{"obj": [{"a": 1}]}`, expTokens: []interface{}{
Delim('{'), "obj", Delim('['),
Delim('{'), "a", float64(1), Delim('}'),
Delim(']'), Delim('}')}},
// streaming tokens with intermittent Decode()
{json: `{ "a": 1 }`, expTokens: []interface{}{
Delim('{'), "a",
decodeThis{float64(1)},
Delim('}')}},
{json: ` [ { "a" : 1 } ] `, expTokens: []interface{}{
Delim('['),
decodeThis{map[string]interface{}{"a": float64(1)}},
Delim(']')}},
{json: ` [{"a": 1},{"a": 2}] `, expTokens: []interface{}{
Delim('['),
decodeThis{map[string]interface{}{"a": float64(1)}},
decodeThis{map[string]interface{}{"a": float64(2)}},
Delim(']')}},
{json: `{ "obj" : [ { "a" : 1 } ] }`, expTokens: []interface{}{
Delim('{'), "obj", Delim('['),
decodeThis{map[string]interface{}{"a": float64(1)}},
Delim(']'), Delim('}')}},
{json: `{"obj": {"a": 1}}`, expTokens: []interface{}{
Delim('{'), "obj",
decodeThis{map[string]interface{}{"a": float64(1)}},
Delim('}')}},
{json: `{"obj": [{"a": 1}]}`, expTokens: []interface{}{
Delim('{'), "obj",
decodeThis{[]interface{}{
map[string]interface{}{"a": float64(1)},
}},
Delim('}')}},
{json: ` [{"a": 1} {"a": 2}] `, expTokens: []interface{}{
Delim('['),
decodeThis{map[string]interface{}{"a": float64(1)}},
decodeThis{&SyntaxError{"expected comma after array element", 11}},
}},
{json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []interface{}{
Delim('{'), strings.Repeat("a", 513),
decodeThis{&SyntaxError{"expected colon after object key", 518}},
}},
{json: `{ "\a" }`, expTokens: []interface{}{
Delim('{'),
&SyntaxError{"invalid character 'a' in string escape code", 3},
}},
{json: ` \a`, expTokens: []interface{}{
&SyntaxError{"invalid character '\\\\' looking for beginning of value", 1},
}},
}
func TestDecodeInStream(t *testing.T) {
for ci, tcase := range tokenStreamCases {
dec := NewDecoder(strings.NewReader(tcase.json))
for i, etk := range tcase.expTokens {
var tk interface{}
var err error
if dt, ok := etk.(decodeThis); ok {
etk = dt.v
err = dec.Decode(&tk)
} else {
tk, err = dec.Token()
}
if experr, ok := etk.(error); ok {
if err == nil || !reflect.DeepEqual(err, experr) {
t.Errorf("case %v: Expected error %#v in %q, but was %#v", ci, experr, tcase.json, err)
}
break
} else if err == io.EOF {
t.Errorf("case %v: Unexpected EOF in %q", ci, tcase.json)
break
} else if err != nil {
t.Errorf("case %v: Unexpected error '%#v' in %q", ci, err, tcase.json)
break
}
if !reflect.DeepEqual(tk, etk) {
t.Errorf(`case %v: %q @ %v expected %T(%v) was %T(%v)`, ci, tcase.json, i, etk, etk, tk, tk)
break
}
}
}
}
// Test from golang.org/issue/11893
func TestHTTPDecoding(t *testing.T) {
const raw = `{ "foo": "bar" }`
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(raw))
}))
defer ts.Close()
res, err := http.Get(ts.URL)
if err != nil {
log.Fatalf("GET failed: %v", err)
}
defer res.Body.Close()
foo := struct {
Foo string
}{}
d := NewDecoder(res.Body)
err = d.Decode(&foo)
if err != nil {
t.Fatalf("Decode: %v", err)
}
if foo.Foo != "bar" {
t.Errorf("decoded %q; want \"bar\"", foo.Foo)
}
// make sure we get the EOF the second time
err = d.Decode(&foo)
if err != io.EOF {
t.Errorf("err = %v; want io.EOF", err)
}
}