mirror of
https://github.com/golang/go.git
synced 2025-10-19 19:13:18 +00:00
bytes, strings: speed up TrimSpace
This change lifts bounds checks out of loops in the TrimSpace functions,
among other micro-optimizations. Here are some benchmark results
(no change to allocations):
goos: darwin
goarch: amd64
pkg: bytes
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
│ old │ new │
│ sec/op │ sec/op vs base │
TrimSpace/NoTrim-8 4.406n ± 0% 3.829n ± 1% -13.11% (p=0.000 n=20)
TrimSpace/ASCII-8 7.688n ± 1% 5.872n ± 1% -23.61% (p=0.000 n=20)
TrimSpace/SomeNonASCII-8 82.25n ± 1% 81.00n ± 1% -1.51% (p=0.001 n=20)
TrimSpace/JustNonASCII-8 131.6n ± 8% 132.2n ± 1% ~ (p=0.899 n=20)
geomean 24.61n 22.15n -9.99%
pkg: strings
│ old │ new │
│ sec/op │ sec/op vs base │
TrimSpace/NoTrim-8 4.178n ± 0% 3.857n ± 2% -7.68% (p=0.001 n=20)
TrimSpace/ASCII-8 7.708n ± 0% 5.585n ± 1% -27.55% (p=0.000 n=20)
TrimSpace/SomeNonASCII-8 98.70n ± 1% 88.54n ± 1% -10.30% (p=0.000 n=20)
TrimSpace/JustNonASCII-8 132.8n ± 2% 123.2n ± 0% -7.16% (p=0.000 n=20)
geomean 25.49n 22.02n -13.61%
Change-Id: I523f03a909c82a51940b44c7b2634985b7447982
GitHub-Last-Rev: 35163f04c6
GitHub-Pull-Request: golang/go#75127
Reviewed-on: https://go-review.googlesource.com/c/go/+/698735
Reviewed-by: Sean Liao <sean@liao.dev>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Sean Liao <sean@liao.dev>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
parent
38204e0872
commit
89d41d254a
2 changed files with 44 additions and 56 deletions
|
@ -1117,41 +1117,34 @@ func trimRightUnicode(s []byte, cutset string) []byte {
|
|||
// TrimSpace returns a subslice of s by slicing off all leading and
|
||||
// trailing white space, as defined by Unicode.
|
||||
func TrimSpace(s []byte) []byte {
|
||||
// Fast path for ASCII: look for the first ASCII non-space byte
|
||||
start := 0
|
||||
for ; start < len(s); start++ {
|
||||
c := s[start]
|
||||
// Fast path for ASCII: look for the first ASCII non-space byte.
|
||||
for lo, c := range s {
|
||||
if c >= utf8.RuneSelf {
|
||||
// If we run into a non-ASCII byte, fall back to the
|
||||
// slower unicode-aware method on the remaining bytes
|
||||
return TrimFunc(s[start:], unicode.IsSpace)
|
||||
// slower unicode-aware method on the remaining bytes.
|
||||
return TrimFunc(s[lo:], unicode.IsSpace)
|
||||
}
|
||||
if asciiSpace[c] == 0 {
|
||||
break
|
||||
if asciiSpace[c] != 0 {
|
||||
continue
|
||||
}
|
||||
s = s[lo:]
|
||||
// Now look for the first ASCII non-space byte from the end.
|
||||
for hi := len(s) - 1; hi >= 0; hi-- {
|
||||
c := s[hi]
|
||||
if c >= utf8.RuneSelf {
|
||||
return TrimFunc(s[:hi+1], unicode.IsSpace)
|
||||
}
|
||||
if asciiSpace[c] == 0 {
|
||||
// At this point, s[:hi+1] starts and ends with ASCII
|
||||
// non-space bytes, so we're done. Non-ASCII cases have
|
||||
// already been handled above.
|
||||
return s[:hi+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now look for the first ASCII non-space byte from the end
|
||||
stop := len(s)
|
||||
for ; stop > start; stop-- {
|
||||
c := s[stop-1]
|
||||
if c >= utf8.RuneSelf {
|
||||
return TrimFunc(s[start:stop], unicode.IsSpace)
|
||||
}
|
||||
if asciiSpace[c] == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// At this point s[start:stop] starts and ends with an ASCII
|
||||
// non-space bytes, so we're done. Non-ASCII cases have already
|
||||
// been handled above.
|
||||
if start == stop {
|
||||
// Special case to preserve previous TrimLeftFunc behavior,
|
||||
// returning nil instead of empty slice if all spaces.
|
||||
return nil
|
||||
}
|
||||
return s[start:stop]
|
||||
// Special case to preserve previous TrimLeftFunc behavior,
|
||||
// returning nil instead of empty slice if all spaces.
|
||||
return nil
|
||||
}
|
||||
|
||||
// Runes interprets s as a sequence of UTF-8-encoded code points.
|
||||
|
|
|
@ -1091,37 +1091,32 @@ func trimRightUnicode(s, cutset string) string {
|
|||
// TrimSpace returns a slice of the string s, with all leading
|
||||
// and trailing white space removed, as defined by Unicode.
|
||||
func TrimSpace(s string) string {
|
||||
// Fast path for ASCII: look for the first ASCII non-space byte
|
||||
start := 0
|
||||
for ; start < len(s); start++ {
|
||||
c := s[start]
|
||||
// Fast path for ASCII: look for the first ASCII non-space byte.
|
||||
for lo, c := range []byte(s) {
|
||||
if c >= utf8.RuneSelf {
|
||||
// If we run into a non-ASCII byte, fall back to the
|
||||
// slower unicode-aware method on the remaining bytes
|
||||
return TrimFunc(s[start:], unicode.IsSpace)
|
||||
// slower unicode-aware method on the remaining bytes.
|
||||
return TrimFunc(s[lo:], unicode.IsSpace)
|
||||
}
|
||||
if asciiSpace[c] == 0 {
|
||||
break
|
||||
if asciiSpace[c] != 0 {
|
||||
continue
|
||||
}
|
||||
s = s[lo:]
|
||||
// Now look for the first ASCII non-space byte from the end.
|
||||
for hi := len(s) - 1; hi >= 0; hi-- {
|
||||
c := s[hi]
|
||||
if c >= utf8.RuneSelf {
|
||||
return TrimRightFunc(s[:hi+1], unicode.IsSpace)
|
||||
}
|
||||
if asciiSpace[c] == 0 {
|
||||
// At this point, s[:hi+1] starts and ends with ASCII
|
||||
// non-space bytes, so we're done. Non-ASCII cases have
|
||||
// already been handled above.
|
||||
return s[:hi+1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now look for the first ASCII non-space byte from the end
|
||||
stop := len(s)
|
||||
for ; stop > start; stop-- {
|
||||
c := s[stop-1]
|
||||
if c >= utf8.RuneSelf {
|
||||
// start has been already trimmed above, should trim end only
|
||||
return TrimRightFunc(s[start:stop], unicode.IsSpace)
|
||||
}
|
||||
if asciiSpace[c] == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// At this point s[start:stop] starts and ends with an ASCII
|
||||
// non-space bytes, so we're done. Non-ASCII cases have already
|
||||
// been handled above.
|
||||
return s[start:stop]
|
||||
return ""
|
||||
}
|
||||
|
||||
// TrimPrefix returns s without the provided leading prefix string.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue