bytes, strings: moves indexRabinKarp function to internal/bytealg

In order to facilitate optimization of IndexAny and LastIndexAny, this patch moves
three Rabin-Karp related functions indexRabinKarp, hashStr and hashStrRev in strings
package to initernal/bytealg. There are also three functions in the bytes package with
the same names and functions but different parameter types. To highlight this, this
patch also moves them to internal/bytealg and gives them slightly different names.

Related benchmark changes on amd64 and arm64:

name          old time/op    new time/op    delta
pkg:strings goos:linux goarch:amd64
Index-16        14.0ns ± 1%    14.1ns ± 2%    ~     (p=0.738 n=5+5)
LastIndex-16    15.5ns ± 1%    15.7ns ± 4%    ~     (p=0.897 n=5+5)
pkg:bytes goos:linux goarch:amd64
Index/10-16     26.5ns ± 1%    26.5ns ± 0%    ~     (p=0.873 n=5+5)
Index/32-16     26.2ns ± 0%    25.7ns ± 0%  -1.68%  (p=0.008 n=5+5)
Index/4K-16     5.12µs ± 4%    5.14µs ± 2%    ~     (p=0.841 n=5+5)
Index/4M-16     5.44ms ± 3%    5.34ms ± 2%    ~     (p=0.056 n=5+5)
Index/64M-16    85.8ms ± 3%    84.6ms ± 0%  -1.37%  (p=0.016 n=5+5)

name          old speed      new speed      delta
pkg:bytes goos:linux goarch:amd64
Index/10-16    377MB/s ± 1%   377MB/s ± 0%    ~     (p=1.000 n=5+5)
Index/32-16   1.22GB/s ± 1%  1.24GB/s ± 0%  +1.66%  (p=0.008 n=5+5)
Index/4K-16    800MB/s ± 4%   797MB/s ± 2%    ~     (p=0.841 n=5+5)
Index/4M-16    771MB/s ± 3%   786MB/s ± 2%    ~     (p=0.056 n=5+5)
Index/64M-16   783MB/s ± 3%   793MB/s ± 0%  +1.36%  (p=0.016 n=5+5)

name         old time/op   new time/op   delta
pkg:strings goos:linux goarch:arm64
Index-8       22.6ns ± 0%   22.5ns ± 0%    ~     (p=0.167 n=5+5)
LastIndex-8   17.5ns ± 0%   17.5ns ± 0%    ~     (all equal)
pkg:bytes goos:linux goarch:arm64
Index/10-8    25.0ns ± 0%   25.0ns ± 0%    ~     (all equal)
Index/32-8     160ns ± 0%    160ns ± 0%    ~     (all equal)
Index/4K-8    6.26µs ± 0%   6.26µs ± 0%    ~     (p=0.167 n=5+5)
Index/4M-8    6.30ms ± 0%   6.31ms ± 0%    ~     (p=1.000 n=5+5)
Index/64M-8    101ms ± 0%    101ms ± 0%    ~     (p=0.690 n=5+5)

name         old speed     new speed     delta
pkg:bytes goos:linux goarch:arm64
Index/10-8   399MB/s ± 0%  400MB/s ± 0%  +0.08%  (p=0.008 n=5+5)
Index/32-8   200MB/s ± 0%  200MB/s ± 0%    ~     (p=0.127 n=4+5)
Index/4K-8   654MB/s ± 0%  654MB/s ± 0%  +0.01%  (p=0.016 n=5+5)
Index/4M-8   665MB/s ± 0%  665MB/s ± 0%    ~     (p=0.833 n=5+5)
Index/64M-8  665MB/s ± 0%  665MB/s ± 0%    ~     (p=0.913 n=5+5)

Change-Id: Icce3bc162bb8613ac36dc963a46c51f8e82ab842
Reviewed-on: https://go-review.googlesource.com/c/go/+/208638
Run-TryBot: eric fang <eric.fang@arm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
erifan01 2019-11-21 14:38:25 +08:00 committed by Ian Lance Taylor
parent cec08794ef
commit 18a6fd44bb
4 changed files with 156 additions and 129 deletions

View file

@ -117,17 +117,17 @@ func LastIndex(s, sep []byte) int {
return -1
}
// Rabin-Karp search from the end of the string
hashss, pow := hashStrRev(sep)
hashss, pow := bytealg.HashStrRevBytes(sep)
last := len(s) - n
var h uint32
for i := len(s) - 1; i >= last; i-- {
h = h*primeRK + uint32(s[i])
h = h*bytealg.PrimeRK + uint32(s[i])
}
if h == hashss && Equal(s[last:], sep) {
return last
}
for i := last - 1; i >= 0; i-- {
h *= primeRK
h *= bytealg.PrimeRK
h += uint32(s[i])
h -= pow * uint32(s[i+n])
if h == hashss && Equal(s[i:i+n], sep) {
@ -1068,7 +1068,7 @@ func Index(s, sep []byte) int {
// we should cutover at even larger average skips,
// because Equal becomes that much more expensive.
// This code does not take that effect into account.
j := indexRabinKarp(s[i:], sep)
j := bytealg.IndexRabinKarpBytes(s[i:], sep)
if j < 0 {
return -1
}
@ -1077,63 +1077,3 @@ func Index(s, sep []byte) int {
}
return -1
}
func indexRabinKarp(s, sep []byte) int {
// Rabin-Karp search
hashsep, pow := hashStr(sep)
n := len(sep)
var h uint32
for i := 0; i < n; i++ {
h = h*primeRK + uint32(s[i])
}
if h == hashsep && Equal(s[:n], sep) {
return 0
}
for i := n; i < len(s); {
h *= primeRK
h += uint32(s[i])
h -= pow * uint32(s[i-n])
i++
if h == hashsep && Equal(s[i-n:i], sep) {
return i - n
}
}
return -1
}
// primeRK is the prime base used in Rabin-Karp algorithm.
const primeRK = 16777619
// hashStr returns the hash and the appropriate multiplicative
// factor for use in Rabin-Karp algorithm.
func hashStr(sep []byte) (uint32, uint32) {
hash := uint32(0)
for i := 0; i < len(sep); i++ {
hash = hash*primeRK + uint32(sep[i])
}
var pow, sq uint32 = 1, primeRK
for i := len(sep); i > 0; i >>= 1 {
if i&1 != 0 {
pow *= sq
}
sq *= sq
}
return hash, pow
}
// hashStrRev returns the hash of the reverse of sep and the
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
func hashStrRev(sep []byte) (uint32, uint32) {
hash := uint32(0)
for i := len(sep) - 1; i >= 0; i-- {
hash = hash*primeRK + uint32(sep[i])
}
var pow, sq uint32 = 1, primeRK
for i := len(sep); i > 0; i >>= 1 {
if i&1 != 0 {
pow *= sq
}
sq *= sq
}
return hash, pow
}