mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
bytes: add optimized countByte for amd64
Use SSE/AVX2 when counting a single byte. Inspired from runtime indexbyte implementation. Benchmark against previous implementation, where 1 byte in every 8 is the one we are looking for: * On a machine without AVX2 name old time/op new time/op delta CountSingle/10-4 61.8ns ±10% 15.6ns ±11% -74.83% (p=0.000 n=10+10) CountSingle/32-4 100ns ± 4% 17ns ±10% -82.54% (p=0.000 n=10+9) CountSingle/4K-4 9.66µs ± 3% 0.37µs ± 6% -96.21% (p=0.000 n=10+10) CountSingle/4M-4 11.0ms ± 6% 0.4ms ± 4% -96.04% (p=0.000 n=10+10) CountSingle/64M-4 194ms ± 8% 8ms ± 2% -95.64% (p=0.000 n=10+10) name old speed new speed delta CountSingle/10-4 162MB/s ±10% 645MB/s ±10% +297.00% (p=0.000 n=10+10) CountSingle/32-4 321MB/s ± 5% 1844MB/s ± 9% +474.79% (p=0.000 n=10+9) CountSingle/4K-4 424MB/s ± 3% 11169MB/s ± 6% +2533.10% (p=0.000 n=10+10) CountSingle/4M-4 381MB/s ± 7% 9609MB/s ± 4% +2421.88% (p=0.000 n=10+10) CountSingle/64M-4 346MB/s ± 7% 7924MB/s ± 2% +2188.78% (p=0.000 n=10+10) * On a machine with AVX2 name old time/op new time/op delta CountSingle/10-8 37.1ns ± 3% 8.2ns ± 1% -77.80% (p=0.000 n=10+10) CountSingle/32-8 66.1ns ± 3% 9.8ns ± 2% -85.23% (p=0.000 n=10+10) CountSingle/4K-8 7.36µs ± 3% 0.11µs ± 1% -98.54% (p=0.000 n=10+10) CountSingle/4M-8 7.46ms ± 2% 0.15ms ± 2% -97.95% (p=0.000 n=10+9) CountSingle/64M-8 124ms ± 2% 6ms ± 4% -95.09% (p=0.000 n=10+10) name old speed new speed delta CountSingle/10-8 269MB/s ± 3% 1213MB/s ± 1% +350.32% (p=0.000 n=10+10) CountSingle/32-8 484MB/s ± 4% 3277MB/s ± 2% +576.66% (p=0.000 n=10+10) CountSingle/4K-8 556MB/s ± 3% 37933MB/s ± 1% +6718.36% (p=0.000 n=10+10) CountSingle/4M-8 562MB/s ± 2% 27444MB/s ± 3% +4783.43% (p=0.000 n=10+9) CountSingle/64M-8 543MB/s ± 2% 11054MB/s ± 3% +1935.81% (p=0.000 n=10+10) Fixes #19411 Change-Id: Ieaf20b1fabccabe767c55c66e242e86f3617f883 Reviewed-on: https://go-review.googlesource.com/38258 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
0ebaca6ba2
commit
01cd22c687
10 changed files with 317 additions and 4 deletions
|
|
@ -396,6 +396,79 @@ func TestIndexRune(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// test count of a single byte across page offsets
|
||||
func TestCountByte(t *testing.T) {
|
||||
b := make([]byte, 5015) // bigger than a page
|
||||
windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128}
|
||||
testCountWindow := func(i, window int) {
|
||||
for j := 0; j < window; j++ {
|
||||
b[i+j] = byte(100)
|
||||
p := Count(b[i:i+window], []byte{100})
|
||||
if p != j+1 {
|
||||
t.Errorf("TestCountByte.Count(%q, 100) = %d", b[i:i+window], p)
|
||||
}
|
||||
pGeneric := CountGeneric(b[i:i+window], []byte{100})
|
||||
if pGeneric != j+1 {
|
||||
t.Errorf("TestCountByte.CountGeneric(%q, 100) = %d", b[i:i+window], p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
maxWnd := windows[len(windows)-1]
|
||||
|
||||
for i := 0; i <= 2*maxWnd; i++ {
|
||||
for _, window := range windows {
|
||||
if window > len(b[i:]) {
|
||||
window = len(b[i:])
|
||||
}
|
||||
testCountWindow(i, window)
|
||||
for j := 0; j < window; j++ {
|
||||
b[i+j] = byte(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
for i := 4096 - (maxWnd + 1); i < len(b); i++ {
|
||||
for _, window := range windows {
|
||||
if window > len(b[i:]) {
|
||||
window = len(b[i:])
|
||||
}
|
||||
testCountWindow(i, window)
|
||||
for j := 0; j < window; j++ {
|
||||
b[i+j] = byte(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we don't count bytes outside our window
|
||||
func TestCountByteNoMatch(t *testing.T) {
|
||||
b := make([]byte, 5015)
|
||||
windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128}
|
||||
for i := 0; i <= len(b); i++ {
|
||||
for _, window := range windows {
|
||||
if window > len(b[i:]) {
|
||||
window = len(b[i:])
|
||||
}
|
||||
// Fill the window with non-match
|
||||
for j := 0; j < window; j++ {
|
||||
b[i+j] = byte(100)
|
||||
}
|
||||
// Try to find something that doesn't exist
|
||||
p := Count(b[i:i+window], []byte{0})
|
||||
if p != 0 {
|
||||
t.Errorf("TestCountByteNoMatch(%q, 0) = %d", b[i:i+window], p)
|
||||
}
|
||||
pGeneric := CountGeneric(b[i:i+window], []byte{0})
|
||||
if pGeneric != 0 {
|
||||
t.Errorf("TestCountByteNoMatch.CountGeneric(%q, 100) = %d", b[i:i+window], p)
|
||||
}
|
||||
for j := 0; j < window; j++ {
|
||||
b[i+j] = byte(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var bmbuf []byte
|
||||
|
||||
func valName(x int) string {
|
||||
|
|
@ -589,6 +662,26 @@ func BenchmarkCountEasy(b *testing.B) {
|
|||
})
|
||||
}
|
||||
|
||||
func BenchmarkCountSingle(b *testing.B) {
|
||||
benchBytes(b, indexSizes, func(b *testing.B, n int) {
|
||||
buf := bmbuf[0:n]
|
||||
step := 8
|
||||
for i := 0; i < len(buf); i += step {
|
||||
buf[i] = 1
|
||||
}
|
||||
expect := (len(buf) + (step - 1)) / step
|
||||
for i := 0; i < b.N; i++ {
|
||||
j := Count(buf, []byte{1})
|
||||
if j != expect {
|
||||
b.Fatal("bad count", j, expect)
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(buf); i++ {
|
||||
buf[i] = 0
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
type ExplodeTest struct {
|
||||
s string
|
||||
n int
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue