mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
strings: faster Count, Index
Slightly better benchmarks for when string and separator are equivalent and also less branching in inner loops. benchmark old ns/op new ns/op delta BenchmarkGenericNoMatch 3430 3442 +0.35% BenchmarkGenericMatch1 23590 22855 -3.12% BenchmarkGenericMatch2 108031 105025 -2.78% BenchmarkSingleMaxSkipping 2969 2704 -8.93% BenchmarkSingleLongSuffixFail 2826 2572 -8.99% BenchmarkSingleMatch 205268 197832 -3.62% BenchmarkByteByteNoMatch 987 921 -6.69% BenchmarkByteByteMatch 2014 1749 -13.16% BenchmarkByteStringMatch 3083 3050 -1.07% BenchmarkHTMLEscapeNew 922 915 -0.76% BenchmarkHTMLEscapeOld 1654 1570 -5.08% BenchmarkByteByteReplaces 11897 11556 -2.87% BenchmarkByteByteMap 4485 4255 -5.13% BenchmarkIndexRune 174 121 -30.46% BenchmarkIndexRuneFastPath 41 41 -0.24% BenchmarkIndex 45 44 -0.22% BenchmarkMapNoChanges 433 431 -0.46% BenchmarkIndexHard1 4015336 3316490 -17.40% BenchmarkIndexHard2 3976254 3395627 -14.60% BenchmarkIndexHard3 3973158 3378329 -14.97% BenchmarkCountHard1 4403549 3448512 -21.69% BenchmarkCountHard2 4387437 3413059 -22.21% BenchmarkCountHard3 4403891 3382661 -23.19% BenchmarkIndexTorture 28354 25864 -8.78% BenchmarkCountTorture 29625 27463 -7.30% BenchmarkFields 38752040 39169840 +1.08% BenchmarkFieldsFunc 38797765 38888060 +0.23% benchmark old MB/s new MB/s speedup BenchmarkSingleMaxSkipping 3367.07 3697.62 1.10x BenchmarkSingleLongSuffixFail 354.51 389.47 1.10x BenchmarkSingleMatch 73.07 75.82 1.04x BenchmarkFields 27.06 26.77 0.99x BenchmarkFieldsFunc 27.03 26.96 1.00x R=dave, fullung, remyoudompheng, rsc CC=golang-dev https://golang.org/cl/7350045
This commit is contained in:
parent
9704c80b93
commit
937f91e1da
2 changed files with 47 additions and 28 deletions
|
|
@ -59,21 +59,26 @@ func hashstr(sep string) (uint32, uint32) {
|
||||||
|
|
||||||
// Count counts the number of non-overlapping instances of sep in s.
|
// Count counts the number of non-overlapping instances of sep in s.
|
||||||
func Count(s, sep string) int {
|
func Count(s, sep string) int {
|
||||||
if sep == "" {
|
|
||||||
return utf8.RuneCountInString(s) + 1
|
|
||||||
}
|
|
||||||
c := sep[0]
|
|
||||||
n := 0
|
n := 0
|
||||||
if len(sep) == 1 {
|
// special cases
|
||||||
|
switch {
|
||||||
|
case len(sep) == 0:
|
||||||
|
return utf8.RuneCountInString(s) + 1
|
||||||
|
case len(sep) == 1:
|
||||||
// special case worth making fast
|
// special case worth making fast
|
||||||
|
c := sep[0]
|
||||||
for i := 0; i < len(s); i++ {
|
for i := 0; i < len(s); i++ {
|
||||||
if s[i] == c {
|
if s[i] == c {
|
||||||
n++
|
n++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return n
|
return n
|
||||||
}
|
case len(sep) > len(s):
|
||||||
if len(sep) > len(s) {
|
return 0
|
||||||
|
case len(sep) == len(s):
|
||||||
|
if sep == s {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
hashsep, pow := hashstr(sep)
|
hashsep, pow := hashstr(sep)
|
||||||
|
|
@ -82,17 +87,19 @@ func Count(s, sep string) int {
|
||||||
h = h*primeRK + uint32(s[i])
|
h = h*primeRK + uint32(s[i])
|
||||||
}
|
}
|
||||||
lastmatch := 0
|
lastmatch := 0
|
||||||
for i := len(sep); ; i++ {
|
if h == hashsep && s[:len(sep)] == sep {
|
||||||
// Invariant: h = hash(s[i-l : i])
|
n++
|
||||||
|
lastmatch = len(sep)
|
||||||
|
}
|
||||||
|
for i := len(sep); i < len(s); {
|
||||||
|
h *= primeRK
|
||||||
|
h += uint32(s[i])
|
||||||
|
h -= pow * uint32(s[i-len(sep)])
|
||||||
|
i++
|
||||||
if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
|
if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
|
||||||
n++
|
n++
|
||||||
lastmatch = i
|
lastmatch = i
|
||||||
}
|
}
|
||||||
if i >= len(s) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
h = h*primeRK + uint32(s[i])
|
|
||||||
h -= pow * uint32(s[i-len(sep)])
|
|
||||||
}
|
}
|
||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
|
|
@ -115,11 +122,11 @@ func ContainsRune(s string, r rune) bool {
|
||||||
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
|
||||||
func Index(s, sep string) int {
|
func Index(s, sep string) int {
|
||||||
n := len(sep)
|
n := len(sep)
|
||||||
if n == 0 {
|
switch {
|
||||||
|
case n == 0:
|
||||||
return 0
|
return 0
|
||||||
}
|
case n == 1:
|
||||||
c := sep[0]
|
c := sep[0]
|
||||||
if n == 1 {
|
|
||||||
// special case worth making fast
|
// special case worth making fast
|
||||||
for i := 0; i < len(s); i++ {
|
for i := 0; i < len(s); i++ {
|
||||||
if s[i] == c {
|
if s[i] == c {
|
||||||
|
|
@ -127,9 +134,12 @@ func Index(s, sep string) int {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
case n == len(s):
|
||||||
// n > 1
|
if sep == s {
|
||||||
if n > len(s) {
|
return 0
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
case n > len(s):
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
// Hash sep.
|
// Hash sep.
|
||||||
|
|
@ -138,16 +148,17 @@ func Index(s, sep string) int {
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
h = h*primeRK + uint32(s[i])
|
h = h*primeRK + uint32(s[i])
|
||||||
}
|
}
|
||||||
for i := n; ; i++ {
|
if h == hashsep && s[:n] == sep {
|
||||||
// Invariant: h = hash(s[i-n : i])
|
return 0
|
||||||
|
}
|
||||||
|
for i := n; i < len(s); {
|
||||||
|
h *= primeRK
|
||||||
|
h += uint32(s[i])
|
||||||
|
h -= pow * uint32(s[i-n])
|
||||||
|
i++
|
||||||
if h == hashsep && s[i-n:i] == sep {
|
if h == hashsep && s[i-n:i] == sep {
|
||||||
return i - n
|
return i - n
|
||||||
}
|
}
|
||||||
if i >= len(s) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
h = h*primeRK + uint32(s[i])
|
|
||||||
h -= pow * uint32(s[i-n])
|
|
||||||
}
|
}
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1052,6 +1052,14 @@ func BenchmarkCountTorture(b *testing.B) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkCountTortureOverlapping(b *testing.B) {
|
||||||
|
A := Repeat("ABC", 1<<20)
|
||||||
|
B := Repeat("ABC", 1<<10)
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
Count(A, B)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var makeFieldsInput = func() string {
|
var makeFieldsInput = func() string {
|
||||||
x := make([]byte, 1<<20)
|
x := make([]byte, 1<<20)
|
||||||
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
|
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue