diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go index 5244ce2e0c..141a9a1a2a 100644 --- a/src/bufio/bufio.go +++ b/src/bufio/bufio.go @@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) { if b.r == b.w { return 0, 0, b.readErr() } - r, size = rune(b.buf[b.r]), 1 - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRune(b.buf[b.r:b.w]) - } + r, size = utf8.DecodeRune(b.buf[b.r:b.w]) b.r += size b.lastByte = int(b.buf[b.r-1]) b.lastRuneSize = size diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index ce2e004910..9a7f4ee3c9 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte { // more efficient, possibly due to cache effects. start := -1 // valid span start if >= 0 for i := 0; i < len(s); { - size := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRune(s[i:]) - } + r, size := utf8.DecodeRune(s[i:]) if f(r) { if start >= 0 { spans = append(spans, span{start, i}) @@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte { // fine. It could also shrink but that falls out naturally. b := make([]byte, 0, len(s)) for i := 0; i < len(s); { - wid := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRune(s[i:]) - } + r, wid := utf8.DecodeRune(s[i:]) r = mapping(r) if r >= 0 { b = utf8.AppendRune(b, r) @@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int { func indexFunc(s []byte, f func(r rune) bool, truth bool) int { start := 0 for start < len(s) { - wid := 1 - r := rune(s[start]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRune(s[start:]) - } + r, wid := utf8.DecodeRune(s[start:]) if f(r) == truth { return start } @@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte { func trimLeftUnicode(s []byte, cutset string) []byte { for len(s) > 0 { - r, n := rune(s[0]), 1 - if r >= utf8.RuneSelf { - r, n = utf8.DecodeRune(s) - } + r, n := utf8.DecodeRune(s) if !containsRune(cutset, r) { break } @@ -1251,19 +1236,10 @@ hasUnicode: t = t[i:] for len(s) != 0 && len(t) != 0 { // Extract first rune from each. - var sr, tr rune - if s[0] < utf8.RuneSelf { - sr, s = rune(s[0]), s[1:] - } else { - r, size := utf8.DecodeRune(s) - sr, s = r, s[size:] - } - if t[0] < utf8.RuneSelf { - tr, t = rune(t[0]), t[1:] - } else { - r, size := utf8.DecodeRune(t) - tr, t = r, t[size:] - } + sr, size := utf8.DecodeRune(s) + s = s[size:] + tr, size := utf8.DecodeRune(t) + t = t[size:] // If they match, keep going; if not, return false. diff --git a/src/bytes/iter.go b/src/bytes/iter.go index b2abb2c9ba..a4ece881d2 100644 --- a/src/bytes/iter.go +++ b/src/bytes/iter.go @@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] { return func(yield func([]byte) bool) { start := -1 for i := 0; i < len(s); { - size := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRune(s[i:]) - } + r, size := utf8.DecodeRune(s[i:]) if f(r) { if start >= 0 { if !yield(s[start:i:i]) { diff --git a/src/cmd/compile/internal/test/inl_test.go b/src/cmd/compile/internal/test/inl_test.go index eda6084b48..a49cd767db 100644 --- a/src/cmd/compile/internal/test/inl_test.go +++ b/src/cmd/compile/internal/test/inl_test.go @@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) { "assemble64", }, "unicode/utf8": { + "DecodeRune", + "DecodeRuneInString", "FullRune", "FullRuneInString", "RuneLen", diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go index 70885a517e..fc29296c0f 100644 --- a/src/encoding/json/decode.go +++ b/src/encoding/json/decode.go @@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) { if c == '\\' || c == '"' || c < ' ' { break } - if c < utf8.RuneSelf { - r++ - continue - } rr, size := utf8.DecodeRune(s[r:]) if rr == utf8.RuneError && size == 1 { break diff --git a/src/fmt/format.go b/src/fmt/format.go index 90e18cd696..334a94e298 100644 --- a/src/fmt/format.go +++ b/src/fmt/format.go @@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte { if n < 0 { return b[:i] } - wid := 1 - if b[i] >= utf8.RuneSelf { - _, wid = utf8.DecodeRune(b[i:]) - } + _, wid := utf8.DecodeRune(b[i:]) i += wid } } diff --git a/src/fmt/print.go b/src/fmt/print.go index 155218046f..01cfa1a1c7 100644 --- a/src/fmt/print.go +++ b/src/fmt/print.go @@ -1145,10 +1145,7 @@ formatLoop: break } - verb, size := rune(format[i]), 1 - if verb >= utf8.RuneSelf { - verb, size = utf8.DecodeRuneInString(format[i:]) - } + verb, size := utf8.DecodeRuneInString(format[i:]) i += size switch { diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index 253415fb6a..66c7369399 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -384,10 +384,6 @@ type inputString struct { func (i *inputString) step(pos int) (rune, int) { if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } return utf8.DecodeRuneInString(i.str[pos:]) } return endOfText, 0 @@ -409,17 +405,11 @@ func (i *inputString) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) - } + r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) } // 0 <= pos && pos < len(i.str) if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) - } + r2, _ = utf8.DecodeRuneInString(i.str[pos:]) } return newLazyFlag(r1, r2) } @@ -431,10 +421,6 @@ type inputBytes struct { func (i *inputBytes) step(pos int) (rune, int) { if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } return utf8.DecodeRune(i.str[pos:]) } return endOfText, 0 @@ -456,17 +442,11 @@ func (i *inputBytes) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) - } + r1, _ = utf8.DecodeLastRune(i.str[:pos]) } // 0 <= pos && pos < len(i.str) if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRune(i.str[pos:]) - } + r2, _ = utf8.DecodeRune(i.str[pos:]) } return newLazyFlag(r1, r2) } diff --git a/src/strconv/quote.go b/src/strconv/quote.go index 99c292a8ed..da2325647d 100644 --- a/src/strconv/quote.go +++ b/src/strconv/quote.go @@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b buf = nBuf } buf = append(buf, quote) - for width := 0; len(s) > 0; s = s[width:] { - r := rune(s[0]) - width = 1 - if r >= utf8.RuneSelf { - r, width = utf8.DecodeRuneInString(s) - } + for r, width := rune(0), 0; len(s) > 0; s = s[width:] { + r, width = utf8.DecodeRuneInString(s) if width == 1 && r == utf8.RuneError { buf = append(buf, `\x`...) buf = append(buf, lowerhex[s[0]>>4]) diff --git a/src/strings/iter.go b/src/strings/iter.go index 69fe031739..84e763a834 100644 --- a/src/strings/iter.go +++ b/src/strings/iter.go @@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] { return func(yield func(string) bool) { start := -1 for i := 0; i < len(s); { - size := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRuneInString(s[i:]) - } + r, size := utf8.DecodeRuneInString(s[i:]) if f(r) { if start >= 0 { if !yield(s[start:i]) { diff --git a/src/strings/reader.go b/src/strings/reader.go index 497ffb7a39..f12c9b18b3 100644 --- a/src/strings/reader.go +++ b/src/strings/reader.go @@ -90,10 +90,6 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) { return 0, 0, io.EOF } r.prevRune = int(r.i) - if c := r.s[r.i]; c < utf8.RuneSelf { - r.i++ - return rune(c), 1, nil - } ch, size = utf8.DecodeRuneInString(r.s[r.i:]) r.i += int64(size) return diff --git a/src/strings/strings.go b/src/strings/strings.go index 74007977d9..3cc3e79f98 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -896,7 +896,7 @@ func TrimLeftFunc(s string, f func(rune) bool) string { // Unicode code points c satisfying f(c) removed. func TrimRightFunc(s string, f func(rune) bool) string { i := lastIndexFunc(s, f, false) - if i >= 0 && s[i] >= utf8.RuneSelf { + if i >= 0 { _, wid := utf8.DecodeRuneInString(s[i:]) i += wid } else { @@ -1028,10 +1028,7 @@ func trimLeftASCII(s string, as *asciiSet) string { func trimLeftUnicode(s, cutset string) string { for len(s) > 0 { - r, n := rune(s[0]), 1 - if r >= utf8.RuneSelf { - r, n = utf8.DecodeRuneInString(s) - } + r, n := utf8.DecodeRuneInString(s) if !ContainsRune(cutset, r) { break } @@ -1224,13 +1221,8 @@ hasUnicode: } // Extract first rune from second string. - var tr rune - if t[0] < utf8.RuneSelf { - tr, t = rune(t[0]), t[1:] - } else { - r, size := utf8.DecodeRuneInString(t) - tr, t = r, t[size:] - } + tr, size := utf8.DecodeRuneInString(t) + t = t[size:] // If they match, keep going; if not, return false. diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 01cad1cc81..68283341d9 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -155,6 +155,20 @@ func FullRuneInString(s string) bool { // out of range, or is not the shortest possible UTF-8 encoding for the // value. No other validation is performed. func DecodeRune(p []byte) (r rune, size int) { + // Inlineable fast path for ASCII characters; see #48195. + // This implementation is weird but effective at rendering the + // function inlineable. + for _, b := range p { + if b < RuneSelf { + return rune(b), 1 + } + break + } + r, size = decodeRuneSlow(p) + return +} + +func decodeRuneSlow(p []byte) (r rune, size int) { n := len(p) if n < 1 { return RuneError, 0 @@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) { // out of range, or is not the shortest possible UTF-8 encoding for the // value. No other validation is performed. func DecodeRuneInString(s string) (r rune, size int) { + // Inlineable fast path for ASCII characters; see #48195. + // This implementation is a bit weird but effective at rendering the + // function inlineable. + if s != "" && s[0] < RuneSelf { + return rune(s[0]), 1 + } else { + r, size = decodeRuneInStringSlow(s) + } + return +} + +func decodeRuneInStringSlow(s string) (rune, int) { n := len(s) if n < 1 { return RuneError, 0 diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go index aece0fab73..bf4f074ffd 100644 --- a/src/unicode/utf8/utf8_test.go +++ b/src/unicode/utf8/utf8_test.go @@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) { func BenchmarkDecodeASCIIRune(b *testing.B) { a := []byte{'a'} - for i := 0; i < b.N; i++ { - DecodeRune(a) + for range b.N { + runeSink, sizeSink = DecodeRune(a) } } func BenchmarkDecodeJapaneseRune(b *testing.B) { nihon := []byte("本") - for i := 0; i < b.N; i++ { - DecodeRune(nihon) + for range b.N { + runeSink, sizeSink = DecodeRune(nihon) } } +func BenchmarkDecodeASCIIRuneInString(b *testing.B) { + a := "a" + for range b.N { + runeSink, sizeSink = DecodeRuneInString(a) + } +} + +func BenchmarkDecodeJapaneseRuneInString(b *testing.B) { + nihon := "本" + for range b.N { + runeSink, sizeSink = DecodeRuneInString(nihon) + } +} + +var ( + runeSink rune + sizeSink int +) + // boolSink is used to reference the return value of benchmarked // functions to avoid dead code elimination. var boolSink bool