From 925a3cdcd13472c8f78d51c9ce99a59e77d46eb4 Mon Sep 17 00:00:00 2001
From: Julien Cretel <jub0bsinthecloud@gmail.com>
Date: Tue, 2 Sep 2025 22:10:40 +0000
Subject: [PATCH] unicode/utf8: make DecodeRune{,InString} inlineable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change makes the fast path for ASCII characters inlineable in
DecodeRune and DecodeRuneInString and removes most instances of manual
inlining at call sites.

Here are some benchmark results (no change to allocations):

goos: darwin
goarch: amd64
pkg: unicode/utf8
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                             │     old      │                 new                  │
                             │    sec/op    │    sec/op     vs base                │
DecodeASCIIRune-8              2.4545n ± 2%   0.6253n ± 2%  -74.52% (p=0.000 n=20)
DecodeJapaneseRune-8            3.988n ± 1%    4.023n ± 1%   +0.86% (p=0.050 n=20)
DecodeASCIIRuneInString-8      2.4675n ± 1%   0.6264n ± 2%  -74.61% (p=0.000 n=20)
DecodeJapaneseRuneInString-8    3.992n ± 1%    4.001n ± 1%        ~ (p=0.625 n=20)
geomean                         3.134n         1.585n       -49.43%

Note: when #61502 gets resolved, DecodeRune and DecodeRuneInString should
be reverted to their idiomatic implementations.

Fixes #31666
Updates #48195

Change-Id: I4be25c4f52417dc28b3a7bd72f1b04018470f39d
GitHub-Last-Rev: 2e352a0045027e059be79cdb60241b5cf35fec71
GitHub-Pull-Request: golang/go#75181
Reviewed-on: https://go-review.googlesource.com/c/go/+/699675
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/bufio/bufio.go                        |  5 +--
 src/bytes/bytes.go                        | 40 +++++------------------
 src/bytes/iter.go                         |  6 +---
 src/cmd/compile/internal/test/inl_test.go |  2 ++
 src/encoding/json/decode.go               |  4 ---
 src/fmt/format.go                         |  5 +--
 src/fmt/print.go                          |  5 +--
 src/regexp/regexp.go                      | 28 +++-------------
 src/strconv/quote.go                      |  8 ++---
 src/strings/iter.go                       |  6 +---
 src/strings/reader.go                     |  4 ---
 src/strings/strings.go                    | 16 +++------
 src/unicode/utf8/utf8.go                  | 26 +++++++++++++++
 src/unicode/utf8/utf8_test.go             | 27 ++++++++++++---
 14 files changed, 74 insertions(+), 108 deletions(-)

diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go
index 5244ce2e0c..141a9a1a2a 100644
--- a/src/bufio/bufio.go
+++ b/src/bufio/bufio.go
@@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) {
 	if b.r == b.w {
 		return 0, 0, b.readErr()
 	}
-	r, size = rune(b.buf[b.r]), 1
-	if r >= utf8.RuneSelf {
-		r, size = utf8.DecodeRune(b.buf[b.r:b.w])
-	}
+	r, size = utf8.DecodeRune(b.buf[b.r:b.w])
 	b.r += size
 	b.lastByte = int(b.buf[b.r-1])
 	b.lastRuneSize = size
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index ce2e004910..9a7f4ee3c9 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
 	// more efficient, possibly due to cache effects.
 	start := -1 // valid span start if >= 0
 	for i := 0; i < len(s); {
-		size := 1
-		r := rune(s[i])
-		if r >= utf8.RuneSelf {
-			r, size = utf8.DecodeRune(s[i:])
-		}
+		r, size := utf8.DecodeRune(s[i:])
 		if f(r) {
 			if start >= 0 {
 				spans = append(spans, span{start, i})
@@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
 	// fine. It could also shrink but that falls out naturally.
 	b := make([]byte, 0, len(s))
 	for i := 0; i < len(s); {
-		wid := 1
-		r := rune(s[i])
-		if r >= utf8.RuneSelf {
-			r, wid = utf8.DecodeRune(s[i:])
-		}
+		r, wid := utf8.DecodeRune(s[i:])
 		r = mapping(r)
 		if r >= 0 {
 			b = utf8.AppendRune(b, r)
@@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int {
 func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
 	start := 0
 	for start < len(s) {
-		wid := 1
-		r := rune(s[start])
-		if r >= utf8.RuneSelf {
-			r, wid = utf8.DecodeRune(s[start:])
-		}
+		r, wid := utf8.DecodeRune(s[start:])
 		if f(r) == truth {
 			return start
 		}
@@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte {
 
 func trimLeftUnicode(s []byte, cutset string) []byte {
 	for len(s) > 0 {
-		r, n := rune(s[0]), 1
-		if r >= utf8.RuneSelf {
-			r, n = utf8.DecodeRune(s)
-		}
+		r, n := utf8.DecodeRune(s)
 		if !containsRune(cutset, r) {
 			break
 		}
@@ -1251,19 +1236,10 @@ hasUnicode:
 	t = t[i:]
 	for len(s) != 0 && len(t) != 0 {
 		// Extract first rune from each.
-		var sr, tr rune
-		if s[0] < utf8.RuneSelf {
-			sr, s = rune(s[0]), s[1:]
-		} else {
-			r, size := utf8.DecodeRune(s)
-			sr, s = r, s[size:]
-		}
-		if t[0] < utf8.RuneSelf {
-			tr, t = rune(t[0]), t[1:]
-		} else {
-			r, size := utf8.DecodeRune(t)
-			tr, t = r, t[size:]
-		}
+		sr, size := utf8.DecodeRune(s)
+		s = s[size:]
+		tr, size := utf8.DecodeRune(t)
+		t = t[size:]
 
 		// If they match, keep going; if not, return false.
 
diff --git a/src/bytes/iter.go b/src/bytes/iter.go
index b2abb2c9ba..a4ece881d2 100644
--- a/src/bytes/iter.go
+++ b/src/bytes/iter.go
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
 	return func(yield func([]byte) bool) {
 		start := -1
 		for i := 0; i < len(s); {
-			size := 1
-			r := rune(s[i])
-			if r >= utf8.RuneSelf {
-				r, size = utf8.DecodeRune(s[i:])
-			}
+			r, size := utf8.DecodeRune(s[i:])
 			if f(r) {
 				if start >= 0 {
 					if !yield(s[start:i:i]) {
diff --git a/src/cmd/compile/internal/test/inl_test.go b/src/cmd/compile/internal/test/inl_test.go
index eda6084b48..a49cd767db 100644
--- a/src/cmd/compile/internal/test/inl_test.go
+++ b/src/cmd/compile/internal/test/inl_test.go
@@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) {
 			"assemble64",
 		},
 		"unicode/utf8": {
+			"DecodeRune",
+			"DecodeRuneInString",
 			"FullRune",
 			"FullRuneInString",
 			"RuneLen",
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index 70885a517e..fc29296c0f 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) {
 		if c == '\\' || c == '"' || c < ' ' {
 			break
 		}
-		if c < utf8.RuneSelf {
-			r++
-			continue
-		}
 		rr, size := utf8.DecodeRune(s[r:])
 		if rr == utf8.RuneError && size == 1 {
 			break
diff --git a/src/fmt/format.go b/src/fmt/format.go
index 90e18cd696..334a94e298 100644
--- a/src/fmt/format.go
+++ b/src/fmt/format.go
@@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte {
 			if n < 0 {
 				return b[:i]
 			}
-			wid := 1
-			if b[i] >= utf8.RuneSelf {
-				_, wid = utf8.DecodeRune(b[i:])
-			}
+			_, wid := utf8.DecodeRune(b[i:])
 			i += wid
 		}
 	}
diff --git a/src/fmt/print.go b/src/fmt/print.go
index 155218046f..01cfa1a1c7 100644
--- a/src/fmt/print.go
+++ b/src/fmt/print.go
@@ -1145,10 +1145,7 @@ formatLoop:
 			break
 		}
 
-		verb, size := rune(format[i]), 1
-		if verb >= utf8.RuneSelf {
-			verb, size = utf8.DecodeRuneInString(format[i:])
-		}
+		verb, size := utf8.DecodeRuneInString(format[i:])
 		i += size
 
 		switch {
diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go
index 253415fb6a..66c7369399 100644
--- a/src/regexp/regexp.go
+++ b/src/regexp/regexp.go
@@ -384,10 +384,6 @@ type inputString struct {
 
 func (i *inputString) step(pos int) (rune, int) {
 	if pos < len(i.str) {
-		c := i.str[pos]
-		if c < utf8.RuneSelf {
-			return rune(c), 1
-		}
 		return utf8.DecodeRuneInString(i.str[pos:])
 	}
 	return endOfText, 0
@@ -409,17 +405,11 @@ func (i *inputString) context(pos int) lazyFlag {
 	r1, r2 := endOfText, endOfText
 	// 0 < pos && pos <= len(i.str)
 	if uint(pos-1) < uint(len(i.str)) {
-		r1 = rune(i.str[pos-1])
-		if r1 >= utf8.RuneSelf {
-			r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
-		}
+		r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
 	}
 	// 0 <= pos && pos < len(i.str)
 	if uint(pos) < uint(len(i.str)) {
-		r2 = rune(i.str[pos])
-		if r2 >= utf8.RuneSelf {
-			r2, _ = utf8.DecodeRuneInString(i.str[pos:])
-		}
+		r2, _ = utf8.DecodeRuneInString(i.str[pos:])
 	}
 	return newLazyFlag(r1, r2)
 }
@@ -431,10 +421,6 @@ type inputBytes struct {
 
 func (i *inputBytes) step(pos int) (rune, int) {
 	if pos < len(i.str) {
-		c := i.str[pos]
-		if c < utf8.RuneSelf {
-			return rune(c), 1
-		}
 		return utf8.DecodeRune(i.str[pos:])
 	}
 	return endOfText, 0
@@ -456,17 +442,11 @@ func (i *inputBytes) context(pos int) lazyFlag {
 	r1, r2 := endOfText, endOfText
 	// 0 < pos && pos <= len(i.str)
 	if uint(pos-1) < uint(len(i.str)) {
-		r1 = rune(i.str[pos-1])
-		if r1 >= utf8.RuneSelf {
-			r1, _ = utf8.DecodeLastRune(i.str[:pos])
-		}
+		r1, _ = utf8.DecodeLastRune(i.str[:pos])
 	}
 	// 0 <= pos && pos < len(i.str)
 	if uint(pos) < uint(len(i.str)) {
-		r2 = rune(i.str[pos])
-		if r2 >= utf8.RuneSelf {
-			r2, _ = utf8.DecodeRune(i.str[pos:])
-		}
+		r2, _ = utf8.DecodeRune(i.str[pos:])
 	}
 	return newLazyFlag(r1, r2)
 }
diff --git a/src/strconv/quote.go b/src/strconv/quote.go
index 99c292a8ed..da2325647d 100644
--- a/src/strconv/quote.go
+++ b/src/strconv/quote.go
@@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b
 		buf = nBuf
 	}
 	buf = append(buf, quote)
-	for width := 0; len(s) > 0; s = s[width:] {
-		r := rune(s[0])
-		width = 1
-		if r >= utf8.RuneSelf {
-			r, width = utf8.DecodeRuneInString(s)
-		}
+	for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
+		r, width = utf8.DecodeRuneInString(s)
 		if width == 1 && r == utf8.RuneError {
 			buf = append(buf, `\x`...)
 			buf = append(buf, lowerhex[s[0]>>4])
diff --git a/src/strings/iter.go b/src/strings/iter.go
index 69fe031739..84e763a834 100644
--- a/src/strings/iter.go
+++ b/src/strings/iter.go
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] {
 	return func(yield func(string) bool) {
 		start := -1
 		for i := 0; i < len(s); {
-			size := 1
-			r := rune(s[i])
-			if r >= utf8.RuneSelf {
-				r, size = utf8.DecodeRuneInString(s[i:])
-			}
+			r, size := utf8.DecodeRuneInString(s[i:])
 			if f(r) {
 				if start >= 0 {
 					if !yield(s[start:i]) {
diff --git a/src/strings/reader.go b/src/strings/reader.go
index 497ffb7a39..f12c9b18b3 100644
--- a/src/strings/reader.go
+++ b/src/strings/reader.go
@@ -90,10 +90,6 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) {
 		return 0, 0, io.EOF
 	}
 	r.prevRune = int(r.i)
-	if c := r.s[r.i]; c < utf8.RuneSelf {
-		r.i++
-		return rune(c), 1, nil
-	}
 	ch, size = utf8.DecodeRuneInString(r.s[r.i:])
 	r.i += int64(size)
 	return
diff --git a/src/strings/strings.go b/src/strings/strings.go
index 74007977d9..3cc3e79f98 100644
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -896,7 +896,7 @@ func TrimLeftFunc(s string, f func(rune) bool) string {
 // Unicode code points c satisfying f(c) removed.
 func TrimRightFunc(s string, f func(rune) bool) string {
 	i := lastIndexFunc(s, f, false)
-	if i >= 0 && s[i] >= utf8.RuneSelf {
+	if i >= 0 {
 		_, wid := utf8.DecodeRuneInString(s[i:])
 		i += wid
 	} else {
@@ -1028,10 +1028,7 @@ func trimLeftASCII(s string, as *asciiSet) string {
 
 func trimLeftUnicode(s, cutset string) string {
 	for len(s) > 0 {
-		r, n := rune(s[0]), 1
-		if r >= utf8.RuneSelf {
-			r, n = utf8.DecodeRuneInString(s)
-		}
+		r, n := utf8.DecodeRuneInString(s)
 		if !ContainsRune(cutset, r) {
 			break
 		}
@@ -1224,13 +1221,8 @@ hasUnicode:
 		}
 
 		// Extract first rune from second string.
-		var tr rune
-		if t[0] < utf8.RuneSelf {
-			tr, t = rune(t[0]), t[1:]
-		} else {
-			r, size := utf8.DecodeRuneInString(t)
-			tr, t = r, t[size:]
-		}
+		tr, size := utf8.DecodeRuneInString(t)
+		t = t[size:]
 
 		// If they match, keep going; if not, return false.
 
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go
index 01cad1cc81..68283341d9 100644
--- a/src/unicode/utf8/utf8.go
+++ b/src/unicode/utf8/utf8.go
@@ -155,6 +155,20 @@ func FullRuneInString(s string) bool {
 // out of range, or is not the shortest possible UTF-8 encoding for the
 // value. No other validation is performed.
 func DecodeRune(p []byte) (r rune, size int) {
+	// Inlineable fast path for ASCII characters; see #48195.
+	// This implementation is weird but effective at rendering the
+	// function inlineable.
+	for _, b := range p {
+		if b < RuneSelf {
+			return rune(b), 1
+		}
+		break
+	}
+	r, size = decodeRuneSlow(p)
+	return
+}
+
+func decodeRuneSlow(p []byte) (r rune, size int) {
 	n := len(p)
 	if n < 1 {
 		return RuneError, 0
@@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) {
 // out of range, or is not the shortest possible UTF-8 encoding for the
 // value. No other validation is performed.
 func DecodeRuneInString(s string) (r rune, size int) {
+	// Inlineable fast path for ASCII characters; see #48195.
+	// This implementation is a bit weird but effective at rendering the
+	// function inlineable.
+	if s != "" && s[0] < RuneSelf {
+		return rune(s[0]), 1
+	} else {
+		r, size = decodeRuneInStringSlow(s)
+	}
+	return
+}
+
+func decodeRuneInStringSlow(s string) (rune, int) {
 	n := len(s)
 	if n < 1 {
 		return RuneError, 0
diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go
index aece0fab73..bf4f074ffd 100644
--- a/src/unicode/utf8/utf8_test.go
+++ b/src/unicode/utf8/utf8_test.go
@@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) {
 
 func BenchmarkDecodeASCIIRune(b *testing.B) {
 	a := []byte{'a'}
-	for i := 0; i < b.N; i++ {
-		DecodeRune(a)
+	for range b.N {
+		runeSink, sizeSink = DecodeRune(a)
 	}
 }
 
 func BenchmarkDecodeJapaneseRune(b *testing.B) {
 	nihon := []byte("本")
-	for i := 0; i < b.N; i++ {
-		DecodeRune(nihon)
+	for range b.N {
+		runeSink, sizeSink = DecodeRune(nihon)
 	}
 }
 
+func BenchmarkDecodeASCIIRuneInString(b *testing.B) {
+	a := "a"
+	for range b.N {
+		runeSink, sizeSink = DecodeRuneInString(a)
+	}
+}
+
+func BenchmarkDecodeJapaneseRuneInString(b *testing.B) {
+	nihon := "本"
+	for range b.N {
+		runeSink, sizeSink = DecodeRuneInString(nihon)
+	}
+}
+
+var (
+	runeSink rune
+	sizeSink int
+)
+
 // boolSink is used to reference the return value of benchmarked
 // functions to avoid dead code elimination.
 var boolSink bool