mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
unicode/utf8: add AppendRune
AppendRune appends the UTF-8 encoding of a rune to a []byte.
It is a generally more user friendly than EncodeRune.
EncodeASCIIRune-4 2.35ns ± 2%
EncodeJapaneseRune-4 4.60ns ± 2%
AppendASCIIRune-4 0.30ns ± 3%
AppendJapaneseRune-4 4.70ns ± 2%
The ASCII case is written to be inlineable.
Fixes #47609
Change-Id: If4f71eedffd2bd4ef0d7f960cb55b41c637eec54
Reviewed-on: https://go-review.googlesource.com/c/go/+/345571
Trust: Joe Tsai <joetsai@digital-static.net>
Reviewed-by: Rob Pike <r@golang.org>
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
parent
ef4cb2f776
commit
f371b30f32
3 changed files with 52 additions and 0 deletions
|
|
@ -122,6 +122,7 @@ func TestIntendedInlining(t *testing.T) {
|
||||||
"FullRune",
|
"FullRune",
|
||||||
"FullRuneInString",
|
"FullRuneInString",
|
||||||
"RuneLen",
|
"RuneLen",
|
||||||
|
"AppendRune",
|
||||||
"ValidRune",
|
"ValidRune",
|
||||||
},
|
},
|
||||||
"reflect": {
|
"reflect": {
|
||||||
|
|
|
||||||
|
|
@ -369,6 +369,32 @@ func EncodeRune(p []byte, r rune) int {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AppendRune appends the UTF-8 encoding of r to the end of p and
|
||||||
|
// returns the extended buffer. If the rune is out of range,
|
||||||
|
// it appends the encoding of RuneError.
|
||||||
|
func AppendRune(p []byte, r rune) []byte {
|
||||||
|
// This function is inlineable for fast handling of ASCII.
|
||||||
|
if uint32(r) <= rune1Max {
|
||||||
|
return append(p, byte(r))
|
||||||
|
}
|
||||||
|
return appendRuneNonASCII(p, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendRuneNonASCII(p []byte, r rune) []byte {
|
||||||
|
// Negative values are erroneous. Making it unsigned addresses the problem.
|
||||||
|
switch i := uint32(r); {
|
||||||
|
case i <= rune2Max:
|
||||||
|
return append(p, t2|byte(r>>6), tx|byte(r)&maskx)
|
||||||
|
case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
|
||||||
|
r = RuneError
|
||||||
|
fallthrough
|
||||||
|
case i <= rune3Max:
|
||||||
|
return append(p, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
|
||||||
|
default:
|
||||||
|
return append(p, t4|byte(r>>18), tx|byte(r>>12)&maskx, tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// RuneCount returns the number of runes in p. Erroneous and short
|
// RuneCount returns the number of runes in p. Erroneous and short
|
||||||
// encodings are treated as single runes of width 1 byte.
|
// encodings are treated as single runes of width 1 byte.
|
||||||
func RuneCount(p []byte) int {
|
func RuneCount(p []byte) int {
|
||||||
|
|
|
||||||
|
|
@ -127,6 +127,17 @@ func TestEncodeRune(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAppendRune(t *testing.T) {
|
||||||
|
for _, m := range utf8map {
|
||||||
|
if buf := AppendRune(nil, m.r); string(buf) != m.str {
|
||||||
|
t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, m.str)
|
||||||
|
}
|
||||||
|
if buf := AppendRune([]byte("init"), m.r); string(buf) != "init"+m.str {
|
||||||
|
t.Errorf("AppendRune(nil, %#04x) = %s, want %s", m.r, buf, "init"+m.str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestDecodeRune(t *testing.T) {
|
func TestDecodeRune(t *testing.T) {
|
||||||
for _, m := range utf8map {
|
for _, m := range utf8map {
|
||||||
b := []byte(m.str)
|
b := []byte(m.str)
|
||||||
|
|
@ -583,6 +594,20 @@ func BenchmarkEncodeJapaneseRune(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkAppendASCIIRune(b *testing.B) {
|
||||||
|
buf := make([]byte, UTFMax)
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
AppendRune(buf[:0], 'a')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkAppendJapaneseRune(b *testing.B) {
|
||||||
|
buf := make([]byte, UTFMax)
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
AppendRune(buf[:0], '本')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkDecodeASCIIRune(b *testing.B) {
|
func BenchmarkDecodeASCIIRune(b *testing.B) {
|
||||||
a := []byte{'a'}
|
a := []byte{'a'}
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue