mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
bytes,strings: in generic Index, use mix of IndexByte and Rabin-Karp
Use IndexByte first, as it allows us to skip lots of bytes quickly. If IndexByte is generating a lot of false positives, switch over to Rabin-Karp. Experiments for ppc64le bytes: name old time/op new time/op delta IndexPeriodic/IndexPeriodic2-2 1.12ms ± 0% 0.18ms ± 0% -83.54% (p=0.000 n=10+9) IndexPeriodic/IndexPeriodic4-2 635µs ± 0% 184µs ± 0% -71.06% (p=0.000 n=9+9) IndexPeriodic/IndexPeriodic8-2 289µs ± 0% 184µs ± 0% -36.51% (p=0.000 n=10+9) IndexPeriodic/IndexPeriodic16-2 133µs ± 0% 183µs ± 0% +37.68% (p=0.000 n=10+9) IndexPeriodic/IndexPeriodic32-2 68.3µs ± 0% 70.2µs ± 0% +2.76% (p=0.000 n=10+10) IndexPeriodic/IndexPeriodic64-2 35.8µs ± 0% 36.6µs ± 0% +2.17% (p=0.000 n=8+10) strings: name old time/op new time/op delta IndexPeriodic/IndexPeriodic2-2 184µs ± 0% 184µs ± 0% +0.11% (p=0.029 n=4+4) IndexPeriodic/IndexPeriodic4-2 184µs ± 0% 184µs ± 0% ~ (p=0.886 n=4+4) IndexPeriodic/IndexPeriodic8-2 184µs ± 0% 184µs ± 0% ~ (p=0.486 n=4+4) IndexPeriodic/IndexPeriodic16-2 185µs ± 1% 184µs ± 0% ~ (p=0.343 n=4+4) IndexPeriodic/IndexPeriodic32-2 184µs ± 0% 69µs ± 0% -62.37% (p=0.029 n=4+4) IndexPeriodic/IndexPeriodic64-2 184µs ± 0% 37µs ± 0% -80.17% (p=0.029 n=4+4) Fixes #22578 Change-Id: If2a4d8554cb96bfd699b58149d13ac294615f8b8 Reviewed-on: https://go-review.googlesource.com/76070 Reviewed-by: Alberto Donizetti <alb.donizetti@gmail.com>
This commit is contained in:
parent
0ffe90b501
commit
a025277505
10 changed files with 154 additions and 140 deletions
|
|
@ -815,3 +815,46 @@ func EqualFold(s, t []byte) bool {
|
|||
// One string is empty. Are both?
|
||||
return len(s) == len(t)
|
||||
}
|
||||
|
||||
func indexRabinKarp(s, sep []byte) int {
|
||||
// Rabin-Karp search
|
||||
hashsep, pow := hashStr(sep)
|
||||
n := len(sep)
|
||||
var h uint32
|
||||
for i := 0; i < n; i++ {
|
||||
h = h*primeRK + uint32(s[i])
|
||||
}
|
||||
if h == hashsep && Equal(s[:n], sep) {
|
||||
return 0
|
||||
}
|
||||
for i := n; i < len(s); {
|
||||
h *= primeRK
|
||||
h += uint32(s[i])
|
||||
h -= pow * uint32(s[i-n])
|
||||
i++
|
||||
if h == hashsep && Equal(s[i-n:i], sep) {
|
||||
return i - n
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// primeRK is the prime base used in Rabin-Karp algorithm.
|
||||
const primeRK = 16777619
|
||||
|
||||
// hashStr returns the hash and the appropriate multiplicative
|
||||
// factor for use in Rabin-Karp algorithm.
|
||||
func hashStr(sep []byte) (uint32, uint32) {
|
||||
hash := uint32(0)
|
||||
for i := 0; i < len(sep); i++ {
|
||||
hash = hash*primeRK + uint32(sep[i])
|
||||
}
|
||||
var pow, sq uint32 = 1, primeRK
|
||||
for i := len(sep); i > 0; i >>= 1 {
|
||||
if i&1 != 0 {
|
||||
pow *= sq
|
||||
}
|
||||
sq *= sq
|
||||
}
|
||||
return hash, pow
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue