encoding/json: simplify folded name logic

The folded name logic (despite all attempts to optimize it)
was fundamentally an O(n) operation where every field in a struct
needed to be linearly scanned in order to find a match.
This made unmashaling of unknown fields always O(n).
Instead of optimizing the comparison for each field,
make it such that we can look up a name in O(1).

We accomplish this by maintaining a map keyed by pre-folded names,
which we can pre-calculate when processing the struct type.
Using a stack-allocated buffer, we can fold the input name and
look up its presence in the map.

Also, instead of mapping from names to indexes,
map directly to a pointer to the field information.
The memory cost of this is the same and avoids an extra slice index.

The new logic is both simpler and faster.

Performance:

	name                   old time/op    new time/op    delta
	CodeDecoder           2.47ms ± 4%    2.42ms ± 2%  -1.83%  (p=0.022 n=10+9)
	UnicodeDecoder         259ns ± 2%     248ns ± 1%  -4.32%  (p=0.000 n=10+10)
	DecoderStream          150ns ± 1%     149ns ± 1%    ~     (p=0.516 n=10+10)
	CodeUnmarshal         3.13ms ± 2%    3.09ms ± 2%  -1.37%  (p=0.022 n=10+9)
	CodeUnmarshalReuse    2.50ms ± 1%    2.45ms ± 1%  -1.96%  (p=0.001 n=8+9)
	UnmarshalString       67.1ns ± 5%    64.5ns ± 5%  -3.90%  (p=0.005 n=10+10)
	UnmarshalFloat64      60.1ns ± 4%    58.4ns ± 2%  -2.89%  (p=0.002 n=10+8)
	UnmarshalInt64        51.0ns ± 4%    49.2ns ± 1%  -3.53%  (p=0.001 n=10+8)
	Issue10335            80.7ns ± 2%    79.2ns ± 1%  -1.82%  (p=0.016 n=10+8)
	Issue34127            28.6ns ± 3%    28.8ns ± 3%    ~     (p=0.388 n=9+10)
	Unmapped               177ns ± 2%     177ns ± 2%    ~     (p=0.956 n=10+10)

Change-Id: I478b2b958f5a63a69c9a991a39cd5ffb43244a2a
Reviewed-on: https://go-review.googlesource.com/c/go/+/471196
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Joseph Tsai <joetsai@digital-static.net>
Auto-Submit: Joseph Tsai <joetsai@digital-static.net>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Johan Brandhorst-Satzkorn <johan.brandhorst@gmail.com>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
This commit is contained in:
Joe Tsai 2023-02-20 11:26:10 -08:00 committed by Gopher Robot
parent 2de406bb9e
commit b9b8cecbfc
4 changed files with 79 additions and 239 deletions

View file

@ -672,8 +672,9 @@ type structEncoder struct {
}
type structFields struct {
list []field
nameIndex map[string]int
list []field
byExactName map[string]*field
byFoldedName map[string]*field
}
func (se structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) {
@ -1033,8 +1034,7 @@ func appendString[Bytes []byte | string](dst []byte, src Bytes, escapeHTML bool)
// A field represents a single field found in a struct.
type field struct {
name string
nameBytes []byte // []byte(name)
equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
nameBytes []byte // []byte(name)
nameNonEsc string // `"` + name + `":`
nameEscHTML string // `"` + HTMLEscape(name) + `":`
@ -1161,7 +1161,6 @@ func typeFields(t reflect.Type) structFields {
quoted: quoted,
}
field.nameBytes = []byte(field.name)
field.equalFold = foldFunc(field.nameBytes)
// Build nameEscHTML and nameNonEsc ahead of time.
nameEscBuf = appendHTMLEscape(nameEscBuf[:0], field.nameBytes)
@ -1240,11 +1239,16 @@ func typeFields(t reflect.Type) structFields {
f := &fields[i]
f.encoder = typeEncoder(typeByIndex(t, f.index))
}
nameIndex := make(map[string]int, len(fields))
exactNameIndex := make(map[string]*field, len(fields))
foldedNameIndex := make(map[string]*field, len(fields))
for i, field := range fields {
nameIndex[field.name] = i
exactNameIndex[field.name] = &fields[i]
// For historical reasons, first folded match takes precedence.
if _, ok := foldedNameIndex[string(foldName(field.nameBytes))]; !ok {
foldedNameIndex[string(foldName(field.nameBytes))] = &fields[i]
}
}
return structFields{fields, nameIndex}
return structFields{fields, exactNameIndex, foldedNameIndex}
}
// dominantField looks through the fields, all of which are known to