mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
encoding/json: coerce invalid UTF-8 to valid UTF-8 during Marshal
In practice, rejecting an entire structure due to a single invalid byte in a string is just too picky, and too hard to track down. Be consistent with the bulk of the standard library by converting invalid UTF-8 into UTF-8 with replacement runes. R=golang-dev, crawshaw CC=golang-dev https://golang.org/cl/11211045
This commit is contained in:
parent
cfefe6a763
commit
64054a40ad
3 changed files with 17 additions and 11 deletions
|
|
@ -17,6 +17,7 @@ crypto/sha1: Sum function to simplify hashing (CL 10571043).
|
||||||
crypto/sha256: Sum256 and Sum224 functions to simplify hashing (CL 10629043).
|
crypto/sha256: Sum256 and Sum224 functions to simplify hashing (CL 10629043).
|
||||||
crypto/sha512: Sum512 and Sum384 functions to simplify hashing (CL 10630043).
|
crypto/sha512: Sum512 and Sum384 functions to simplify hashing (CL 10630043).
|
||||||
crypto/tls: add support for TLS 1.1. (CL 7872043).
|
crypto/tls: add support for TLS 1.1. (CL 7872043).
|
||||||
|
encoding/json: accept but correct invalid UTF-8 in Marshal (CL 11211045).
|
||||||
flag: add Getter interface (CL 10472043).
|
flag: add Getter interface (CL 10472043).
|
||||||
fmt: indexed access to arguments in Printf etc. (CL 9680043).
|
fmt: indexed access to arguments in Printf etc. (CL 9680043).
|
||||||
go/build: support including C++ code with cgo (CL 8248043).
|
go/build: support including C++ code with cgo (CL 8248043).
|
||||||
|
|
|
||||||
|
|
@ -393,15 +393,10 @@ func TestMarshal(t *testing.T) {
|
||||||
|
|
||||||
func TestMarshalBadUTF8(t *testing.T) {
|
func TestMarshalBadUTF8(t *testing.T) {
|
||||||
s := "hello\xffworld"
|
s := "hello\xffworld"
|
||||||
|
const enc = `"hello\ufffdworld"`
|
||||||
b, err := Marshal(s)
|
b, err := Marshal(s)
|
||||||
if err == nil {
|
if string(b) != enc || err != nil {
|
||||||
t.Fatal("Marshal bad UTF8: no error")
|
t.Errorf("Marshal(%q) = %#q, %v, want %#q, nil", s, b, err, enc)
|
||||||
}
|
|
||||||
if len(b) != 0 {
|
|
||||||
t.Fatal("Marshal returned data")
|
|
||||||
}
|
|
||||||
if _, ok := err.(*InvalidUTF8Error); !ok {
|
|
||||||
t.Fatalf("Marshal did not return InvalidUTF8Error: %T %v", err, err)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -209,8 +209,12 @@ func (e *UnsupportedValueError) Error() string {
|
||||||
return "json: unsupported value: " + e.Str
|
return "json: unsupported value: " + e.Str
|
||||||
}
|
}
|
||||||
|
|
||||||
// An InvalidUTF8Error is returned by Marshal when attempting
|
// Before Go 1.2, an InvalidUTF8Error was returned by Marshal when
|
||||||
// to encode a string value with invalid UTF-8 sequences.
|
// attempting to encode a string value with invalid UTF-8 sequences.
|
||||||
|
// As of Go 1.2, Marshal instead coerces the string to valid UTF-8 by
|
||||||
|
// replacing invalid bytes with the Unicode replacement rune U+FFFD.
|
||||||
|
// This error is no longer generated but is kept for backwards compatibility
|
||||||
|
// with programs that might mention it.
|
||||||
type InvalidUTF8Error struct {
|
type InvalidUTF8Error struct {
|
||||||
S string // the whole string value that caused the error
|
S string // the whole string value that caused the error
|
||||||
}
|
}
|
||||||
|
|
@ -555,7 +559,13 @@ func (e *encodeState) string(s string) (int, error) {
|
||||||
}
|
}
|
||||||
c, size := utf8.DecodeRuneInString(s[i:])
|
c, size := utf8.DecodeRuneInString(s[i:])
|
||||||
if c == utf8.RuneError && size == 1 {
|
if c == utf8.RuneError && size == 1 {
|
||||||
e.error(&InvalidUTF8Error{s})
|
if start < i {
|
||||||
|
e.WriteString(s[start:i])
|
||||||
|
}
|
||||||
|
e.WriteString(`\ufffd`)
|
||||||
|
i += size
|
||||||
|
start = i
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
// U+2028 is LINE SEPARATOR.
|
// U+2028 is LINE SEPARATOR.
|
||||||
// U+2029 is PARAGRAPH SEPARATOR.
|
// U+2029 is PARAGRAPH SEPARATOR.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue