unicode/utf8: make DecodeRune{,InString} inlineable

This change makes the fast path for ASCII characters inlineable in
DecodeRune and DecodeRuneInString and removes most instances of manual
inlining at call sites.

Here are some benchmark results (no change to allocations):

goos: darwin
goarch: amd64
pkg: unicode/utf8
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                             │     old      │                 new                  │
                             │    sec/op    │    sec/op     vs base                │
DecodeASCIIRune-8              2.4545n ± 2%   0.6253n ± 2%  -74.52% (p=0.000 n=20)
DecodeJapaneseRune-8            3.988n ± 1%    4.023n ± 1%   +0.86% (p=0.050 n=20)
DecodeASCIIRuneInString-8      2.4675n ± 1%   0.6264n ± 2%  -74.61% (p=0.000 n=20)
DecodeJapaneseRuneInString-8    3.992n ± 1%    4.001n ± 1%        ~ (p=0.625 n=20)
geomean                         3.134n         1.585n       -49.43%

Note: when #61502 gets resolved, DecodeRune and DecodeRuneInString should
be reverted to their idiomatic implementations.

Fixes #31666
Updates #48195

Change-Id: I4be25c4f52417dc28b3a7bd72f1b04018470f39d
GitHub-Last-Rev: 2e352a0045
GitHub-Pull-Request: golang/go#75181
Reviewed-on: https://go-review.googlesource.com/c/go/+/699675
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Julien Cretel 2025-09-02 22:10:40 +00:00 committed by t hepudds
parent 3e596d448f
commit 925a3cdcd1
14 changed files with 74 additions and 108 deletions

View file

@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) {
if b.r == b.w { if b.r == b.w {
return 0, 0, b.readErr() return 0, 0, b.readErr()
} }
r, size = rune(b.buf[b.r]), 1
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(b.buf[b.r:b.w]) r, size = utf8.DecodeRune(b.buf[b.r:b.w])
}
b.r += size b.r += size
b.lastByte = int(b.buf[b.r-1]) b.lastByte = int(b.buf[b.r-1])
b.lastRuneSize = size b.lastRuneSize = size

View file

@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
// more efficient, possibly due to cache effects. // more efficient, possibly due to cache effects.
start := -1 // valid span start if >= 0 start := -1 // valid span start if >= 0
for i := 0; i < len(s); { for i := 0; i < len(s); {
size := 1 r, size := utf8.DecodeRune(s[i:])
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(s[i:])
}
if f(r) { if f(r) {
if start >= 0 { if start >= 0 {
spans = append(spans, span{start, i}) spans = append(spans, span{start, i})
@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
// fine. It could also shrink but that falls out naturally. // fine. It could also shrink but that falls out naturally.
b := make([]byte, 0, len(s)) b := make([]byte, 0, len(s))
for i := 0; i < len(s); { for i := 0; i < len(s); {
wid := 1 r, wid := utf8.DecodeRune(s[i:])
r := rune(s[i])
if r >= utf8.RuneSelf {
r, wid = utf8.DecodeRune(s[i:])
}
r = mapping(r) r = mapping(r)
if r >= 0 { if r >= 0 {
b = utf8.AppendRune(b, r) b = utf8.AppendRune(b, r)
@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int {
func indexFunc(s []byte, f func(r rune) bool, truth bool) int { func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
start := 0 start := 0
for start < len(s) { for start < len(s) {
wid := 1 r, wid := utf8.DecodeRune(s[start:])
r := rune(s[start])
if r >= utf8.RuneSelf {
r, wid = utf8.DecodeRune(s[start:])
}
if f(r) == truth { if f(r) == truth {
return start return start
} }
@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte {
func trimLeftUnicode(s []byte, cutset string) []byte { func trimLeftUnicode(s []byte, cutset string) []byte {
for len(s) > 0 { for len(s) > 0 {
r, n := rune(s[0]), 1 r, n := utf8.DecodeRune(s)
if r >= utf8.RuneSelf {
r, n = utf8.DecodeRune(s)
}
if !containsRune(cutset, r) { if !containsRune(cutset, r) {
break break
} }
@ -1251,19 +1236,10 @@ hasUnicode:
t = t[i:] t = t[i:]
for len(s) != 0 && len(t) != 0 { for len(s) != 0 && len(t) != 0 {
// Extract first rune from each. // Extract first rune from each.
var sr, tr rune sr, size := utf8.DecodeRune(s)
if s[0] < utf8.RuneSelf { s = s[size:]
sr, s = rune(s[0]), s[1:] tr, size := utf8.DecodeRune(t)
} else { t = t[size:]
r, size := utf8.DecodeRune(s)
sr, s = r, s[size:]
}
if t[0] < utf8.RuneSelf {
tr, t = rune(t[0]), t[1:]
} else {
r, size := utf8.DecodeRune(t)
tr, t = r, t[size:]
}
// If they match, keep going; if not, return false. // If they match, keep going; if not, return false.

View file

@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
return func(yield func([]byte) bool) { return func(yield func([]byte) bool) {
start := -1 start := -1
for i := 0; i < len(s); { for i := 0; i < len(s); {
size := 1 r, size := utf8.DecodeRune(s[i:])
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(s[i:])
}
if f(r) { if f(r) {
if start >= 0 { if start >= 0 {
if !yield(s[start:i:i]) { if !yield(s[start:i:i]) {

View file

@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) {
"assemble64", "assemble64",
}, },
"unicode/utf8": { "unicode/utf8": {
"DecodeRune",
"DecodeRuneInString",
"FullRune", "FullRune",
"FullRuneInString", "FullRuneInString",
"RuneLen", "RuneLen",

View file

@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) {
if c == '\\' || c == '"' || c < ' ' { if c == '\\' || c == '"' || c < ' ' {
break break
} }
if c < utf8.RuneSelf {
r++
continue
}
rr, size := utf8.DecodeRune(s[r:]) rr, size := utf8.DecodeRune(s[r:])
if rr == utf8.RuneError && size == 1 { if rr == utf8.RuneError && size == 1 {
break break

View file

@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte {
if n < 0 { if n < 0 {
return b[:i] return b[:i]
} }
wid := 1 _, wid := utf8.DecodeRune(b[i:])
if b[i] >= utf8.RuneSelf {
_, wid = utf8.DecodeRune(b[i:])
}
i += wid i += wid
} }
} }

View file

@ -1145,10 +1145,7 @@ formatLoop:
break break
} }
verb, size := rune(format[i]), 1 verb, size := utf8.DecodeRuneInString(format[i:])
if verb >= utf8.RuneSelf {
verb, size = utf8.DecodeRuneInString(format[i:])
}
i += size i += size
switch { switch {

View file

@ -384,10 +384,6 @@ type inputString struct {
func (i *inputString) step(pos int) (rune, int) { func (i *inputString) step(pos int) (rune, int) {
if pos < len(i.str) { if pos < len(i.str) {
c := i.str[pos]
if c < utf8.RuneSelf {
return rune(c), 1
}
return utf8.DecodeRuneInString(i.str[pos:]) return utf8.DecodeRuneInString(i.str[pos:])
} }
return endOfText, 0 return endOfText, 0
@ -409,18 +405,12 @@ func (i *inputString) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str) // 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) { if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
} }
}
// 0 <= pos && pos < len(i.str) // 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) { if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRuneInString(i.str[pos:]) r2, _ = utf8.DecodeRuneInString(i.str[pos:])
} }
}
return newLazyFlag(r1, r2) return newLazyFlag(r1, r2)
} }
@ -431,10 +421,6 @@ type inputBytes struct {
func (i *inputBytes) step(pos int) (rune, int) { func (i *inputBytes) step(pos int) (rune, int) {
if pos < len(i.str) { if pos < len(i.str) {
c := i.str[pos]
if c < utf8.RuneSelf {
return rune(c), 1
}
return utf8.DecodeRune(i.str[pos:]) return utf8.DecodeRune(i.str[pos:])
} }
return endOfText, 0 return endOfText, 0
@ -456,18 +442,12 @@ func (i *inputBytes) context(pos int) lazyFlag {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
// 0 < pos && pos <= len(i.str) // 0 < pos && pos <= len(i.str)
if uint(pos-1) < uint(len(i.str)) { if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRune(i.str[:pos]) r1, _ = utf8.DecodeLastRune(i.str[:pos])
} }
}
// 0 <= pos && pos < len(i.str) // 0 <= pos && pos < len(i.str)
if uint(pos) < uint(len(i.str)) { if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRune(i.str[pos:]) r2, _ = utf8.DecodeRune(i.str[pos:])
} }
}
return newLazyFlag(r1, r2) return newLazyFlag(r1, r2)
} }

View file

@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b
buf = nBuf buf = nBuf
} }
buf = append(buf, quote) buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] { for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s) r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError { if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...) buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4]) buf = append(buf, lowerhex[s[0]>>4])

View file

@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] {
return func(yield func(string) bool) { return func(yield func(string) bool) {
start := -1 start := -1
for i := 0; i < len(s); { for i := 0; i < len(s); {
size := 1 r, size := utf8.DecodeRuneInString(s[i:])
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRuneInString(s[i:])
}
if f(r) { if f(r) {
if start >= 0 { if start >= 0 {
if !yield(s[start:i]) { if !yield(s[start:i]) {

View file

@ -90,10 +90,6 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) {
return 0, 0, io.EOF return 0, 0, io.EOF
} }
r.prevRune = int(r.i) r.prevRune = int(r.i)
if c := r.s[r.i]; c < utf8.RuneSelf {
r.i++
return rune(c), 1, nil
}
ch, size = utf8.DecodeRuneInString(r.s[r.i:]) ch, size = utf8.DecodeRuneInString(r.s[r.i:])
r.i += int64(size) r.i += int64(size)
return return

View file

@ -896,7 +896,7 @@ func TrimLeftFunc(s string, f func(rune) bool) string {
// Unicode code points c satisfying f(c) removed. // Unicode code points c satisfying f(c) removed.
func TrimRightFunc(s string, f func(rune) bool) string { func TrimRightFunc(s string, f func(rune) bool) string {
i := lastIndexFunc(s, f, false) i := lastIndexFunc(s, f, false)
if i >= 0 && s[i] >= utf8.RuneSelf { if i >= 0 {
_, wid := utf8.DecodeRuneInString(s[i:]) _, wid := utf8.DecodeRuneInString(s[i:])
i += wid i += wid
} else { } else {
@ -1028,10 +1028,7 @@ func trimLeftASCII(s string, as *asciiSet) string {
func trimLeftUnicode(s, cutset string) string { func trimLeftUnicode(s, cutset string) string {
for len(s) > 0 { for len(s) > 0 {
r, n := rune(s[0]), 1 r, n := utf8.DecodeRuneInString(s)
if r >= utf8.RuneSelf {
r, n = utf8.DecodeRuneInString(s)
}
if !ContainsRune(cutset, r) { if !ContainsRune(cutset, r) {
break break
} }
@ -1224,13 +1221,8 @@ hasUnicode:
} }
// Extract first rune from second string. // Extract first rune from second string.
var tr rune tr, size := utf8.DecodeRuneInString(t)
if t[0] < utf8.RuneSelf { t = t[size:]
tr, t = rune(t[0]), t[1:]
} else {
r, size := utf8.DecodeRuneInString(t)
tr, t = r, t[size:]
}
// If they match, keep going; if not, return false. // If they match, keep going; if not, return false.

View file

@ -155,6 +155,20 @@ func FullRuneInString(s string) bool {
// out of range, or is not the shortest possible UTF-8 encoding for the // out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed. // value. No other validation is performed.
func DecodeRune(p []byte) (r rune, size int) { func DecodeRune(p []byte) (r rune, size int) {
// Inlineable fast path for ASCII characters; see #48195.
// This implementation is weird but effective at rendering the
// function inlineable.
for _, b := range p {
if b < RuneSelf {
return rune(b), 1
}
break
}
r, size = decodeRuneSlow(p)
return
}
func decodeRuneSlow(p []byte) (r rune, size int) {
n := len(p) n := len(p)
if n < 1 { if n < 1 {
return RuneError, 0 return RuneError, 0
@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) {
// out of range, or is not the shortest possible UTF-8 encoding for the // out of range, or is not the shortest possible UTF-8 encoding for the
// value. No other validation is performed. // value. No other validation is performed.
func DecodeRuneInString(s string) (r rune, size int) { func DecodeRuneInString(s string) (r rune, size int) {
// Inlineable fast path for ASCII characters; see #48195.
// This implementation is a bit weird but effective at rendering the
// function inlineable.
if s != "" && s[0] < RuneSelf {
return rune(s[0]), 1
} else {
r, size = decodeRuneInStringSlow(s)
}
return
}
func decodeRuneInStringSlow(s string) (rune, int) {
n := len(s) n := len(s)
if n < 1 { if n < 1 {
return RuneError, 0 return RuneError, 0

View file

@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) {
func BenchmarkDecodeASCIIRune(b *testing.B) { func BenchmarkDecodeASCIIRune(b *testing.B) {
a := []byte{'a'} a := []byte{'a'}
for i := 0; i < b.N; i++ { for range b.N {
DecodeRune(a) runeSink, sizeSink = DecodeRune(a)
} }
} }
func BenchmarkDecodeJapaneseRune(b *testing.B) { func BenchmarkDecodeJapaneseRune(b *testing.B) {
nihon := []byte("本") nihon := []byte("本")
for i := 0; i < b.N; i++ { for range b.N {
DecodeRune(nihon) runeSink, sizeSink = DecodeRune(nihon)
} }
} }
func BenchmarkDecodeASCIIRuneInString(b *testing.B) {
a := "a"
for range b.N {
runeSink, sizeSink = DecodeRuneInString(a)
}
}
func BenchmarkDecodeJapaneseRuneInString(b *testing.B) {
nihon := "本"
for range b.N {
runeSink, sizeSink = DecodeRuneInString(nihon)
}
}
var (
runeSink rune
sizeSink int
)
// boolSink is used to reference the return value of benchmarked // boolSink is used to reference the return value of benchmarked
// functions to avoid dead code elimination. // functions to avoid dead code elimination.
var boolSink bool var boolSink bool