mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
strings,bytes: avoid allocations in Trim/TrimLeft/TrimRight
There is evidence that the vast majority of uses for Trim* involve cutsets with a single ASCII character, and the vast majority of remaining uses involve cutsets with a small (<4) ASCII characters. For this reason it makes sense to provide better fast paths for these common cases. Furthermore the current implementation needlessly allocates for unclear benefits. This CL also replaces all paths to avoid allocations and, as a side effect, it speeds up also the slow path. strings: name old time/op new time/op delta Trim 1.71µs ± 1% 0.70µs ± 0% -58.93% (p=0.008 n=5+5) TrimASCII/1:1 6.43ns ± 0% 6.34ns ± 0% -1.41% (p=0.008 n=5+5) TrimASCII/1:2 97.3ns ± 0% 18.2ns ± 1% -81.34% (p=0.008 n=5+5) TrimASCII/1:4 101ns ± 0% 21ns ± 0% -78.77% (p=0.008 n=5+5) TrimASCII/1:8 109ns ± 0% 29ns ± 0% -73.60% (p=0.008 n=5+5) TrimASCII/1:16 124ns ± 0% 43ns ± 0% -65.16% (p=0.008 n=5+5) TrimASCII/16:1 19.8ns ± 0% 18.6ns ± 0% -5.90% (p=0.008 n=5+5) TrimASCII/16:2 167ns ± 0% 33ns ± 0% -80.21% (p=0.008 n=5+5) TrimASCII/16:4 169ns ± 0% 35ns ± 0% -79.01% (p=0.008 n=5+5) TrimASCII/16:8 177ns ± 0% 43ns ± 0% -75.88% (p=0.008 n=5+5) TrimASCII/16:16 193ns ± 2% 57ns ± 1% -70.30% (p=0.008 n=5+5) TrimASCII/256:1 232ns ± 0% 232ns ± 0% ~ (p=1.000 n=5+5) TrimASCII/256:2 1.28µs ± 1% 0.26µs ± 0% -79.46% (p=0.008 n=5+5) TrimASCII/256:4 1.27µs ± 0% 0.27µs ± 0% -78.95% (p=0.008 n=5+5) TrimASCII/256:8 1.28µs ± 0% 0.28µs ± 1% -78.28% (p=0.008 n=5+5) TrimASCII/256:16 1.30µs ± 1% 0.29µs ± 0% -77.49% (p=0.008 n=5+5) TrimASCII/4096:1 3.47µs ± 0% 3.47µs ± 0% -0.14% (p=0.008 n=5+5) TrimASCII/4096:2 18.2µs ± 0% 3.9µs ± 0% -78.53% (p=0.008 n=5+5) TrimASCII/4096:4 18.2µs ± 0% 3.9µs ± 0% -78.55% (p=0.008 n=5+5) TrimASCII/4096:8 18.2µs ± 0% 3.9µs ± 0% -78.49% (p=0.008 n=5+5) TrimASCII/4096:16 18.3µs ± 0% 3.9µs ± 0% -78.44% (p=0.008 n=5+5) TrimByte 10.6ns ± 1% 10.1ns ± 0% -5.01% (p=0.008 n=5+5) TrimSpace/NoTrim 5.90ns ± 0% 5.89ns ± 0% ~ (p=0.135 n=5+5) TrimSpace/ASCII 10.6ns ± 0% 9.9ns ± 0% -6.21% (p=0.008 n=5+5) TrimSpace/SomeNonASCII 127ns ± 0% 126ns ± 0% -0.96% (p=0.008 n=5+5) TrimSpace/JustNonASCII 178ns ± 0% 178ns ± 0% ~ (p=0.825 n=5+4) name old alloc/op new alloc/op delta Trim 456B ± 0% 0B -100.00% (p=0.008 n=5+5) TrimASCII/1:1 0.00B 0.00B ~ (all equal) TrimASCII/1:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/1:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/1:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/1:16 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:1 0.00B 0.00B ~ (all equal) TrimASCII/16:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:16 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:1 0.00B 0.00B ~ (all equal) TrimASCII/256:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:16 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:1 0.00B 0.00B ~ (all equal) TrimASCII/4096:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:16 48.0B ± 0% 0.0B ~ (p=0.079 n=4+5) TrimByte 0.00B 0.00B ~ (all equal) TrimSpace/NoTrim 0.00B 0.00B ~ (all equal) TrimSpace/ASCII 0.00B 0.00B ~ (all equal) TrimSpace/SomeNonASCII 0.00B 0.00B ~ (all equal) TrimSpace/JustNonASCII 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta Trim 18.0 ± 0% 0.0 -100.00% (p=0.008 n=5+5) TrimASCII/1:1 0.00 0.00 ~ (all equal) TrimASCII/1:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/1:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/1:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/1:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:1 0.00 0.00 ~ (all equal) TrimASCII/16:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:1 0.00 0.00 ~ (all equal) TrimASCII/256:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:1 0.00 0.00 ~ (all equal) TrimASCII/4096:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimByte 0.00 0.00 ~ (all equal) TrimSpace/NoTrim 0.00 0.00 ~ (all equal) TrimSpace/ASCII 0.00 0.00 ~ (all equal) TrimSpace/SomeNonASCII 0.00 0.00 ~ (all equal) TrimSpace/JustNonASCII 0.00 0.00 ~ (all equal) bytes: name old time/op new time/op delta TrimSpace/NoTrim 5.89ns ± 0% 5.91ns ± 0% ~ (p=0.095 n=5+4) TrimSpace/ASCII 10.3ns ± 1% 10.2ns ± 0% ~ (p=0.095 n=5+5) TrimSpace/SomeNonASCII 120ns ± 1% 121ns ± 0% +1.13% (p=0.008 n=5+5) TrimSpace/JustNonASCII 194ns ± 1% 195ns ± 0% ~ (p=0.143 n=5+5) TrimASCII/1:1 6.28ns ± 0% 5.95ns ± 0% -5.26% (p=0.008 n=5+5) TrimASCII/1:2 95.8ns ± 1% 18.6ns ± 0% -80.63% (p=0.008 n=5+5) TrimASCII/1:4 98.8ns ± 0% 22.1ns ± 0% -77.62% (p=0.008 n=5+5) TrimASCII/1:8 107ns ± 0% 29ns ± 0% -72.72% (p=0.008 n=5+5) TrimASCII/1:16 123ns ± 0% 44ns ± 1% -64.30% (p=0.008 n=5+5) TrimASCII/16:1 13.2ns ± 0% 12.8ns ± 1% -2.75% (p=0.008 n=5+5) TrimASCII/16:2 169ns ± 0% 33ns ± 0% -80.33% (p=0.008 n=5+5) TrimASCII/16:4 173ns ± 0% 36ns ± 0% -79.31% (p=0.008 n=5+5) TrimASCII/16:8 180ns ± 0% 43ns ± 0% -76.02% (p=0.008 n=5+5) TrimASCII/16:16 197ns ± 2% 58ns ± 0% -70.73% (p=0.008 n=5+5) TrimASCII/256:1 137ns ± 1% 136ns ± 0% -0.82% (p=0.016 n=5+5) TrimASCII/256:2 1.40µs ± 0% 0.26µs ± 0% -81.02% (p=0.008 n=5+5) TrimASCII/256:4 1.40µs ± 0% 0.27µs ± 0% -80.83% (p=0.008 n=5+5) TrimASCII/256:8 1.41µs ± 0% 0.28µs ± 0% -80.36% (p=0.008 n=5+5) TrimASCII/256:16 1.42µs ± 0% 0.29µs ± 0% -79.48% (p=0.008 n=5+5) TrimASCII/4096:1 1.75µs ± 0% 1.75µs ± 0% ~ (p=0.595 n=5+5) TrimASCII/4096:2 20.9µs ± 0% 3.9µs ± 0% -81.29% (p=0.008 n=5+5) TrimASCII/4096:4 20.9µs ± 0% 3.9µs ± 0% -81.27% (p=0.008 n=5+5) TrimASCII/4096:8 20.9µs ± 0% 3.9µs ± 0% -81.22% (p=0.008 n=5+5) TrimASCII/4096:16 20.9µs ± 0% 3.9µs ± 0% -81.21% (p=0.008 n=5+5) TrimByte 9.21ns ± 0% 9.30ns ± 0% +0.91% (p=0.008 n=5+5) name old alloc/op new alloc/op delta TrimSpace/NoTrim 0.00B 0.00B ~ (all equal) TrimSpace/ASCII 0.00B 0.00B ~ (all equal) TrimSpace/SomeNonASCII 0.00B 0.00B ~ (all equal) TrimSpace/JustNonASCII 0.00B 0.00B ~ (all equal) TrimASCII/1:1 0.00B 0.00B ~ (all equal) TrimASCII/1:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/1:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/1:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/1:16 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:1 0.00B 0.00B ~ (all equal) TrimASCII/16:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/16:16 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:1 0.00B 0.00B ~ (all equal) TrimASCII/256:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/256:16 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:1 0.00B 0.00B ~ (all equal) TrimASCII/4096:2 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:4 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:8 48.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimASCII/4096:16 49.0B ± 0% 0.0B -100.00% (p=0.008 n=5+5) TrimByte 0.00B 0.00B ~ (all equal) name old allocs/op new allocs/op delta TrimSpace/NoTrim 0.00 0.00 ~ (all equal) TrimSpace/ASCII 0.00 0.00 ~ (all equal) TrimSpace/SomeNonASCII 0.00 0.00 ~ (all equal) TrimSpace/JustNonASCII 0.00 0.00 ~ (all equal) TrimASCII/1:1 0.00 0.00 ~ (all equal) TrimASCII/1:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/1:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/1:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/1:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:1 0.00 0.00 ~ (all equal) TrimASCII/16:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/16:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:1 0.00 0.00 ~ (all equal) TrimASCII/256:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/256:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:1 0.00 0.00 ~ (all equal) TrimASCII/4096:2 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:4 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:8 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimASCII/4096:16 2.00 ± 0% 0.00 -100.00% (p=0.008 n=5+5) TrimByte 0.00 0.00 ~ (all equal) Fixes #46446 Change-Id: I9537c86f888af6285027f67bda4a97aeedb41d4a Reviewed-on: https://go-review.googlesource.com/c/go/+/332771 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Joe Tsai <joetsai@digital-static.net> Trust: Joe Tsai <joetsai@digital-static.net> Trust: Than McIntosh <thanm@google.com>
This commit is contained in:
parent
2e107b43c7
commit
4002616f9a
2 changed files with 145 additions and 31 deletions
|
|
@ -797,6 +797,8 @@ func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
|
|||
// most-significant bit of the highest word, map to the full range of all
|
||||
// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
|
||||
// ensuring that any non-ASCII character will be reported as not in the set.
|
||||
// This allocates a total of 32 bytes even though the upper half
|
||||
// is unused to avoid bounds checks in asciiSet.contains.
|
||||
type asciiSet [8]uint32
|
||||
|
||||
// makeASCIISet creates a set of ASCII characters and reports whether all
|
||||
|
|
@ -807,23 +809,14 @@ func makeASCIISet(chars string) (as asciiSet, ok bool) {
|
|||
if c >= utf8.RuneSelf {
|
||||
return as, false
|
||||
}
|
||||
as[c>>5] |= 1 << uint(c&31)
|
||||
as[c/32] |= 1 << (c % 32)
|
||||
}
|
||||
return as, true
|
||||
}
|
||||
|
||||
// contains reports whether c is inside the set.
|
||||
func (as *asciiSet) contains(c byte) bool {
|
||||
return (as[c>>5] & (1 << uint(c&31))) != 0
|
||||
}
|
||||
|
||||
func makeCutsetFunc(cutset string) func(rune) bool {
|
||||
if as, isASCII := makeASCIISet(cutset); isASCII {
|
||||
return func(r rune) bool {
|
||||
return r < utf8.RuneSelf && as.contains(byte(r))
|
||||
}
|
||||
}
|
||||
return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
|
||||
return (as[c/32] & (1 << (c % 32))) != 0
|
||||
}
|
||||
|
||||
// Trim returns a slice of the string s with all leading and
|
||||
|
|
@ -835,7 +828,10 @@ func Trim(s, cutset string) string {
|
|||
if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
|
||||
return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
|
||||
}
|
||||
return TrimFunc(s, makeCutsetFunc(cutset))
|
||||
if as, ok := makeASCIISet(cutset); ok {
|
||||
return trimLeftASCII(trimRightASCII(s, &as), &as)
|
||||
}
|
||||
return trimLeftUnicode(trimRightUnicode(s, cutset), cutset)
|
||||
}
|
||||
|
||||
// TrimLeft returns a slice of the string s with all leading
|
||||
|
|
@ -849,7 +845,10 @@ func TrimLeft(s, cutset string) string {
|
|||
if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
|
||||
return trimLeftByte(s, cutset[0])
|
||||
}
|
||||
return TrimLeftFunc(s, makeCutsetFunc(cutset))
|
||||
if as, ok := makeASCIISet(cutset); ok {
|
||||
return trimLeftASCII(s, &as)
|
||||
}
|
||||
return trimLeftUnicode(s, cutset)
|
||||
}
|
||||
|
||||
func trimLeftByte(s string, c byte) string {
|
||||
|
|
@ -859,6 +858,30 @@ func trimLeftByte(s string, c byte) string {
|
|||
return s
|
||||
}
|
||||
|
||||
func trimLeftASCII(s string, as *asciiSet) string {
|
||||
for len(s) > 0 {
|
||||
if !as.contains(s[0]) {
|
||||
break
|
||||
}
|
||||
s = s[1:]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func trimLeftUnicode(s, cutset string) string {
|
||||
for len(s) > 0 {
|
||||
r, n := rune(s[0]), 1
|
||||
if r >= utf8.RuneSelf {
|
||||
r, n = utf8.DecodeRuneInString(s)
|
||||
}
|
||||
if !ContainsRune(cutset, r) {
|
||||
break
|
||||
}
|
||||
s = s[n:]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// TrimRight returns a slice of the string s, with all trailing
|
||||
// Unicode code points contained in cutset removed.
|
||||
//
|
||||
|
|
@ -870,7 +893,10 @@ func TrimRight(s, cutset string) string {
|
|||
if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
|
||||
return trimRightByte(s, cutset[0])
|
||||
}
|
||||
return TrimRightFunc(s, makeCutsetFunc(cutset))
|
||||
if as, ok := makeASCIISet(cutset); ok {
|
||||
return trimRightASCII(s, &as)
|
||||
}
|
||||
return trimRightUnicode(s, cutset)
|
||||
}
|
||||
|
||||
func trimRightByte(s string, c byte) string {
|
||||
|
|
@ -880,6 +906,30 @@ func trimRightByte(s string, c byte) string {
|
|||
return s
|
||||
}
|
||||
|
||||
func trimRightASCII(s string, as *asciiSet) string {
|
||||
for len(s) > 0 {
|
||||
if !as.contains(s[len(s)-1]) {
|
||||
break
|
||||
}
|
||||
s = s[:len(s)-1]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func trimRightUnicode(s, cutset string) string {
|
||||
for len(s) > 0 {
|
||||
r, n := rune(s[len(s)-1]), 1
|
||||
if r >= utf8.RuneSelf {
|
||||
r, n = utf8.DecodeLastRuneInString(s)
|
||||
}
|
||||
if !ContainsRune(cutset, r) {
|
||||
break
|
||||
}
|
||||
s = s[:len(s)-n]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// TrimSpace returns a slice of the string s, with all leading
|
||||
// and trailing white space removed, as defined by Unicode.
|
||||
func TrimSpace(s string) string {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue