strings: optimize Count for amd64

Move optimized Count implementation from bytes to runtime. Use in
both bytes and strings packages.
Add CountByte benchmark to strings.

Strings benchmarks:
name                       old time/op    new time/op    delta
CountHard1-4                 226µs ± 1%      226µs ± 2%      ~     (p=0.247 n=10+10)
CountHard2-4                 316µs ± 1%      315µs ± 0%      ~     (p=0.133 n=9+10)
CountHard3-4                 919µs ± 1%      920µs ± 1%      ~     (p=0.968 n=10+9)
CountTorture-4              15.4µs ± 1%     15.7µs ± 1%    +2.47%  (p=0.000 n=10+9)
CountTortureOverlapping-4   9.60ms ± 0%     9.65ms ± 1%      ~     (p=0.247 n=10+10)
CountByte/10-4              26.3ns ± 1%     10.9ns ± 1%   -58.71%  (p=0.000 n=9+9)
CountByte/32-4              42.7ns ± 0%     14.2ns ± 0%   -66.64%  (p=0.000 n=10+10)
CountByte/4096-4            3.07µs ± 0%     0.31µs ± 2%   -89.99%  (p=0.000 n=9+10)
CountByte/4194304-4         3.48ms ± 1%     0.34ms ± 1%   -90.09%  (p=0.000 n=10+9)
CountByte/67108864-4        55.6ms ± 1%      7.0ms ± 0%   -87.49%  (p=0.000 n=9+8)

name                      old speed      new speed       delta
CountByte/10-4             380MB/s ± 1%    919MB/s ± 1%  +142.21%  (p=0.000 n=9+9)
CountByte/32-4             750MB/s ± 0%   2247MB/s ± 0%  +199.62%  (p=0.000 n=10+10)
CountByte/4096-4          1.33GB/s ± 0%  13.32GB/s ± 2%  +898.13%  (p=0.000 n=9+10)
CountByte/4194304-4       1.21GB/s ± 1%  12.17GB/s ± 1%  +908.87%  (p=0.000 n=10+9)
CountByte/67108864-4      1.21GB/s ± 1%   9.65GB/s ± 0%  +699.29%  (p=0.000 n=9+8)

Fixes #19411

Change-Id: I8d2d409f0fa6df6d03b60790aa86e540b4a4e3b0
Reviewed-on: https://go-review.googlesource.com/38693
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Josselin Costanzi 2017-03-27 13:22:59 +02:00 committed by Josh Bleecher Snyder
parent 03d1aa6024
commit d206af1e6c
9 changed files with 247 additions and 194 deletions

View file

@ -1457,6 +1457,24 @@ func BenchmarkCountTortureOverlapping(b *testing.B) {
}
}
func BenchmarkCountByte(b *testing.B) {
indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
benchStr := Repeat(benchmarkString,
(indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
benchFunc := func(b *testing.B, benchStr string) {
b.SetBytes(int64(len(benchStr)))
for i := 0; i < b.N; i++ {
Count(benchStr, "=")
}
}
for _, size := range indexSizes {
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
benchFunc(b, benchStr[:size])
})
}
}
var makeFieldsInput = func() string {
x := make([]byte, 1<<20)
// Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.