2010-09-21 23:12:57 -07:00
|
|
|
// Copyright 2010 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package suffixarray
|
|
|
|
|
|
|
|
|
|
import (
|
2011-01-11 21:46:50 -08:00
|
|
|
"bytes"
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
"fmt"
|
2020-07-07 13:49:21 -04:00
|
|
|
"io/fs"
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
"io/ioutil"
|
2011-11-08 15:40:58 -08:00
|
|
|
"math/rand"
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
"path/filepath"
|
2011-09-26 18:33:13 -04:00
|
|
|
"regexp"
|
2010-09-21 23:12:57 -07:00
|
|
|
"sort"
|
|
|
|
|
"strings"
|
|
|
|
|
"testing"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type testCase struct {
|
2010-12-17 14:00:46 -08:00
|
|
|
name string // name of test case
|
|
|
|
|
source string // source to index
|
|
|
|
|
patterns []string // patterns to lookup
|
2010-09-21 23:12:57 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var testCases = []testCase{
|
2010-10-22 10:06:33 -07:00
|
|
|
{
|
2010-09-21 23:12:57 -07:00
|
|
|
"empty string",
|
|
|
|
|
"",
|
|
|
|
|
[]string{
|
|
|
|
|
"",
|
|
|
|
|
"foo",
|
2010-12-17 14:00:46 -08:00
|
|
|
"(foo)",
|
|
|
|
|
".*",
|
|
|
|
|
"a*",
|
2010-09-21 23:12:57 -07:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
|
2010-10-22 10:06:33 -07:00
|
|
|
{
|
2010-09-21 23:12:57 -07:00
|
|
|
"all a's",
|
|
|
|
|
"aaaaaaaaaa", // 10 a's
|
|
|
|
|
[]string{
|
|
|
|
|
"",
|
|
|
|
|
"a",
|
|
|
|
|
"aa",
|
|
|
|
|
"aaa",
|
|
|
|
|
"aaaa",
|
|
|
|
|
"aaaaa",
|
|
|
|
|
"aaaaaa",
|
|
|
|
|
"aaaaaaa",
|
|
|
|
|
"aaaaaaaa",
|
|
|
|
|
"aaaaaaaaa",
|
|
|
|
|
"aaaaaaaaaa",
|
|
|
|
|
"aaaaaaaaaaa", // 11 a's
|
2010-12-17 14:00:46 -08:00
|
|
|
".",
|
|
|
|
|
".*",
|
|
|
|
|
"a+",
|
|
|
|
|
"aa+",
|
|
|
|
|
"aaaa[b]?",
|
|
|
|
|
"aaa*",
|
2010-09-21 23:12:57 -07:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
|
2010-10-22 10:06:33 -07:00
|
|
|
{
|
2010-09-21 23:12:57 -07:00
|
|
|
"abc",
|
|
|
|
|
"abc",
|
|
|
|
|
[]string{
|
|
|
|
|
"a",
|
|
|
|
|
"b",
|
|
|
|
|
"c",
|
|
|
|
|
"ab",
|
|
|
|
|
"bc",
|
|
|
|
|
"abc",
|
2010-12-17 14:00:46 -08:00
|
|
|
"a.c",
|
|
|
|
|
"a(b|c)",
|
|
|
|
|
"abc?",
|
2010-09-21 23:12:57 -07:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
|
2010-10-22 10:06:33 -07:00
|
|
|
{
|
2010-09-21 23:12:57 -07:00
|
|
|
"barbara*3",
|
|
|
|
|
"barbarabarbarabarbara",
|
|
|
|
|
[]string{
|
|
|
|
|
"a",
|
|
|
|
|
"bar",
|
|
|
|
|
"rab",
|
|
|
|
|
"arab",
|
|
|
|
|
"barbar",
|
2010-12-17 14:00:46 -08:00
|
|
|
"bara?bar",
|
2010-09-21 23:12:57 -07:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
|
2010-10-22 10:06:33 -07:00
|
|
|
{
|
2010-09-21 23:12:57 -07:00
|
|
|
"typing drill",
|
|
|
|
|
"Now is the time for all good men to come to the aid of their country.",
|
|
|
|
|
[]string{
|
|
|
|
|
"Now",
|
|
|
|
|
"the time",
|
|
|
|
|
"to come the aid",
|
|
|
|
|
"is the time for all good men to come to the aid of their",
|
2010-12-17 14:00:46 -08:00
|
|
|
"to (come|the)?",
|
2010-09-21 23:12:57 -07:00
|
|
|
},
|
|
|
|
|
},
|
2011-01-31 13:13:02 -08:00
|
|
|
|
|
|
|
|
{
|
|
|
|
|
"godoc simulation",
|
|
|
|
|
"package main\n\nimport(\n \"rand\"\n ",
|
|
|
|
|
[]string{},
|
|
|
|
|
},
|
2010-09-21 23:12:57 -07:00
|
|
|
}
|
|
|
|
|
|
2011-05-30 18:02:59 +10:00
|
|
|
// find all occurrences of s in source; report at most n occurrences
|
2010-09-21 23:12:57 -07:00
|
|
|
func find(src, s string, n int) []int {
|
2011-08-08 14:32:37 -07:00
|
|
|
var res []int
|
2010-09-21 23:12:57 -07:00
|
|
|
if s != "" && n != 0 {
|
2011-01-04 13:16:50 -08:00
|
|
|
// find at most n occurrences of s in src
|
2010-09-21 23:12:57 -07:00
|
|
|
for i := -1; n < 0 || len(res) < n; {
|
|
|
|
|
j := strings.Index(src[i+1:], s)
|
|
|
|
|
if j < 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
i += j + 1
|
2011-08-08 14:32:37 -07:00
|
|
|
res = append(res, i)
|
2010-09-21 23:12:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return res
|
|
|
|
|
}
|
|
|
|
|
|
2010-12-17 14:00:46 -08:00
|
|
|
func testLookup(t *testing.T, tc *testCase, x *Index, s string, n int) {
|
|
|
|
|
res := x.Lookup([]byte(s), n)
|
|
|
|
|
exp := find(tc.source, s, n)
|
2010-09-21 23:12:57 -07:00
|
|
|
|
2010-12-17 14:00:46 -08:00
|
|
|
// check that the lengths match
|
|
|
|
|
if len(res) != len(exp) {
|
|
|
|
|
t.Errorf("test %q, lookup %q (n = %d): expected %d results; got %d", tc.name, s, n, len(exp), len(res))
|
|
|
|
|
}
|
2010-09-21 23:12:57 -07:00
|
|
|
|
2010-12-17 14:00:46 -08:00
|
|
|
// if n >= 0 the number of results is limited --- unless n >= all results,
|
|
|
|
|
// we may obtain different positions from the Index and from find (because
|
|
|
|
|
// Index may not find the results in the same order as find) => in general
|
|
|
|
|
// we cannot simply check that the res and exp lists are equal
|
|
|
|
|
|
|
|
|
|
// check that each result is in fact a correct match and there are no duplicates
|
2011-07-08 10:52:50 +10:00
|
|
|
sort.Ints(res)
|
2010-12-17 14:00:46 -08:00
|
|
|
for i, r := range res {
|
|
|
|
|
if r < 0 || len(tc.source) <= r {
|
|
|
|
|
t.Errorf("test %q, lookup %q, result %d (n = %d): index %d out of range [0, %d[", tc.name, s, i, n, r, len(tc.source))
|
|
|
|
|
} else if !strings.HasPrefix(tc.source[r:], s) {
|
|
|
|
|
t.Errorf("test %q, lookup %q, result %d (n = %d): index %d not a match", tc.name, s, i, n, r)
|
|
|
|
|
}
|
|
|
|
|
if i > 0 && res[i-1] == r {
|
|
|
|
|
t.Errorf("test %q, lookup %q, result %d (n = %d): found duplicate index %d", tc.name, s, i, n, r)
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-09-21 23:12:57 -07:00
|
|
|
|
2010-12-17 14:00:46 -08:00
|
|
|
if n < 0 {
|
|
|
|
|
// all results computed - sorted res and exp must be equal
|
2010-09-21 23:12:57 -07:00
|
|
|
for i, r := range res {
|
2010-12-17 14:00:46 -08:00
|
|
|
e := exp[i]
|
|
|
|
|
if r != e {
|
|
|
|
|
t.Errorf("test %q, lookup %q, result %d: expected index %d; got %d", tc.name, s, i, e, r)
|
2010-09-21 23:12:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
2010-12-17 14:00:46 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func testFindAllIndex(t *testing.T, tc *testCase, x *Index, rx *regexp.Regexp, n int) {
|
|
|
|
|
res := x.FindAllIndex(rx, n)
|
|
|
|
|
exp := rx.FindAllStringIndex(tc.source, n)
|
|
|
|
|
|
|
|
|
|
// check that the lengths match
|
|
|
|
|
if len(res) != len(exp) {
|
|
|
|
|
t.Errorf("test %q, FindAllIndex %q (n = %d): expected %d results; got %d", tc.name, rx, n, len(exp), len(res))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if n >= 0 the number of results is limited --- unless n >= all results,
|
|
|
|
|
// we may obtain different positions from the Index and from regexp (because
|
|
|
|
|
// Index may not find the results in the same order as regexp) => in general
|
|
|
|
|
// we cannot simply check that the res and exp lists are equal
|
|
|
|
|
|
|
|
|
|
// check that each result is in fact a correct match and the result is sorted
|
|
|
|
|
for i, r := range res {
|
|
|
|
|
if r[0] < 0 || r[0] > r[1] || len(tc.source) < r[1] {
|
|
|
|
|
t.Errorf("test %q, FindAllIndex %q, result %d (n == %d): illegal match [%d, %d]", tc.name, rx, i, n, r[0], r[1])
|
|
|
|
|
} else if !rx.MatchString(tc.source[r[0]:r[1]]) {
|
|
|
|
|
t.Errorf("test %q, FindAllIndex %q, result %d (n = %d): [%d, %d] not a match", tc.name, rx, i, n, r[0], r[1])
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-09-21 23:12:57 -07:00
|
|
|
|
2010-12-17 14:00:46 -08:00
|
|
|
if n < 0 {
|
|
|
|
|
// all results computed - sorted res and exp must be equal
|
2010-09-21 23:12:57 -07:00
|
|
|
for i, r := range res {
|
2010-12-17 14:00:46 -08:00
|
|
|
e := exp[i]
|
|
|
|
|
if r[0] != e[0] || r[1] != e[1] {
|
|
|
|
|
t.Errorf("test %q, FindAllIndex %q, result %d: expected match [%d, %d]; got [%d, %d]",
|
|
|
|
|
tc.name, rx, i, e[0], e[1], r[0], r[1])
|
2010-09-21 23:12:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
2010-12-17 14:00:46 -08:00
|
|
|
}
|
|
|
|
|
}
|
2010-09-21 23:12:57 -07:00
|
|
|
|
2010-12-17 14:00:46 -08:00
|
|
|
func testLookups(t *testing.T, tc *testCase, x *Index, n int) {
|
|
|
|
|
for _, pat := range tc.patterns {
|
|
|
|
|
testLookup(t, tc, x, pat, n)
|
|
|
|
|
if rx, err := regexp.Compile(pat); err == nil {
|
|
|
|
|
testFindAllIndex(t, tc, x, rx, n)
|
2010-09-21 23:12:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-11 21:46:50 -08:00
|
|
|
// index is used to hide the sort.Interface
|
|
|
|
|
type index Index
|
|
|
|
|
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
func (x *index) Len() int { return x.sa.len() }
|
2011-01-11 21:46:50 -08:00
|
|
|
func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
func (x *index) Swap(i, j int) {
|
|
|
|
|
if x.sa.int32 != nil {
|
|
|
|
|
x.sa.int32[i], x.sa.int32[j] = x.sa.int32[j], x.sa.int32[i]
|
|
|
|
|
} else {
|
|
|
|
|
x.sa.int64[i], x.sa.int64[j] = x.sa.int64[j], x.sa.int64[i]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (x *index) at(i int) []byte {
|
|
|
|
|
return x.data[x.sa.get(i):]
|
|
|
|
|
}
|
2011-01-11 21:46:50 -08:00
|
|
|
|
|
|
|
|
func testConstruction(t *testing.T, tc *testCase, x *Index) {
|
|
|
|
|
if !sort.IsSorted((*index)(x)) {
|
2011-09-15 16:21:21 -07:00
|
|
|
t.Errorf("failed testConstruction %s", tc.name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func equal(x, y *Index) bool {
|
|
|
|
|
if !bytes.Equal(x.data, y.data) {
|
|
|
|
|
return false
|
|
|
|
|
}
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
if x.sa.len() != y.sa.len() {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
n := x.sa.len()
|
|
|
|
|
for i := 0; i < n; i++ {
|
|
|
|
|
if x.sa.get(i) != y.sa.get(i) {
|
2011-09-15 16:21:21 -07:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-30 11:31:28 -07:00
|
|
|
// returns the serialized index size
|
|
|
|
|
func testSaveRestore(t *testing.T, tc *testCase, x *Index) int {
|
2011-09-15 16:21:21 -07:00
|
|
|
var buf bytes.Buffer
|
|
|
|
|
if err := x.Write(&buf); err != nil {
|
|
|
|
|
t.Errorf("failed writing index %s (%s)", tc.name, err)
|
|
|
|
|
}
|
2011-09-30 11:31:28 -07:00
|
|
|
size := buf.Len()
|
2011-09-15 16:21:21 -07:00
|
|
|
var y Index
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
|
|
|
|
|
t.Errorf("failed reading index %s (%s)", tc.name, err)
|
|
|
|
|
}
|
|
|
|
|
if !equal(x, &y) {
|
|
|
|
|
t.Errorf("restored index doesn't match saved index %s", tc.name)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
old := maxData32
|
|
|
|
|
defer func() {
|
|
|
|
|
maxData32 = old
|
|
|
|
|
}()
|
|
|
|
|
// Reread as forced 32.
|
|
|
|
|
y = Index{}
|
|
|
|
|
maxData32 = realMaxData32
|
|
|
|
|
if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
|
|
|
|
|
t.Errorf("failed reading index %s (%s)", tc.name, err)
|
|
|
|
|
}
|
|
|
|
|
if !equal(x, &y) {
|
|
|
|
|
t.Errorf("restored index doesn't match saved index %s", tc.name)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Reread as forced 64.
|
|
|
|
|
y = Index{}
|
|
|
|
|
maxData32 = -1
|
|
|
|
|
if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
|
2011-09-15 16:21:21 -07:00
|
|
|
t.Errorf("failed reading index %s (%s)", tc.name, err)
|
|
|
|
|
}
|
|
|
|
|
if !equal(x, &y) {
|
|
|
|
|
t.Errorf("restored index doesn't match saved index %s", tc.name)
|
2011-01-11 21:46:50 -08:00
|
|
|
}
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
|
2011-09-30 11:31:28 -07:00
|
|
|
return size
|
2011-01-11 21:46:50 -08:00
|
|
|
}
|
|
|
|
|
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
func testIndex(t *testing.T) {
|
2010-09-21 23:12:57 -07:00
|
|
|
for _, tc := range testCases {
|
|
|
|
|
x := New([]byte(tc.source))
|
2011-01-11 21:46:50 -08:00
|
|
|
testConstruction(t, &tc, x)
|
2011-09-15 16:21:21 -07:00
|
|
|
testSaveRestore(t, &tc, x)
|
2010-12-17 14:00:46 -08:00
|
|
|
testLookups(t, &tc, x, 0)
|
|
|
|
|
testLookups(t, &tc, x, 1)
|
|
|
|
|
testLookups(t, &tc, x, 10)
|
|
|
|
|
testLookups(t, &tc, x, 2e9)
|
|
|
|
|
testLookups(t, &tc, x, -1)
|
2010-09-21 23:12:57 -07:00
|
|
|
}
|
|
|
|
|
}
|
2011-09-20 14:36:19 -07:00
|
|
|
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
func TestIndex32(t *testing.T) {
|
|
|
|
|
testIndex(t)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestIndex64(t *testing.T) {
|
|
|
|
|
maxData32 = -1
|
|
|
|
|
defer func() {
|
|
|
|
|
maxData32 = realMaxData32
|
|
|
|
|
}()
|
|
|
|
|
testIndex(t)
|
|
|
|
|
}
|
|
|
|
|
|
2019-04-26 10:32:15 -04:00
|
|
|
func TestNew32(t *testing.T) {
|
|
|
|
|
test(t, func(x []byte) []int {
|
|
|
|
|
sa := make([]int32, len(x))
|
|
|
|
|
text_32(x, sa)
|
|
|
|
|
out := make([]int, len(sa))
|
|
|
|
|
for i, v := range sa {
|
|
|
|
|
out[i] = int(v)
|
|
|
|
|
}
|
|
|
|
|
return out
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestNew64(t *testing.T) {
|
|
|
|
|
test(t, func(x []byte) []int {
|
|
|
|
|
sa := make([]int64, len(x))
|
|
|
|
|
text_64(x, sa)
|
|
|
|
|
out := make([]int, len(sa))
|
|
|
|
|
for i, v := range sa {
|
|
|
|
|
out[i] = int(v)
|
|
|
|
|
}
|
|
|
|
|
return out
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// test tests an arbitrary suffix array construction function.
|
|
|
|
|
// Generates many inputs, builds and checks suffix arrays.
|
|
|
|
|
func test(t *testing.T, build func([]byte) []int) {
|
|
|
|
|
t.Run("ababab...", func(t *testing.T) {
|
|
|
|
|
// Very repetitive input has numLMS = len(x)/2-1
|
|
|
|
|
// at top level, the largest it can be.
|
|
|
|
|
// But maxID is only two (aba and ab$).
|
|
|
|
|
size := 100000
|
|
|
|
|
if testing.Short() {
|
|
|
|
|
size = 10000
|
|
|
|
|
}
|
|
|
|
|
x := make([]byte, size)
|
|
|
|
|
for i := range x {
|
|
|
|
|
x[i] = "ab"[i%2]
|
|
|
|
|
}
|
|
|
|
|
testSA(t, x, build)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("forcealloc", func(t *testing.T) {
|
|
|
|
|
// Construct a pathological input that forces
|
|
|
|
|
// recurse_32 to allocate a new temporary buffer.
|
|
|
|
|
// The input must have more than N/3 LMS-substrings,
|
|
|
|
|
// which we arrange by repeating an SLSLSLSLSLSL pattern
|
|
|
|
|
// like ababab... above, but then we must also arrange
|
|
|
|
|
// for a large number of distinct LMS-substrings.
|
|
|
|
|
// We use this pattern:
|
|
|
|
|
// 1 255 1 254 1 253 1 ... 1 2 1 255 2 254 2 253 2 252 2 ...
|
|
|
|
|
// This gives approximately 2¹⁵ distinct LMS-substrings.
|
|
|
|
|
// We need to repeat at least one substring, though,
|
|
|
|
|
// or else the recursion can be bypassed entirely.
|
|
|
|
|
x := make([]byte, 100000, 100001)
|
|
|
|
|
lo := byte(1)
|
|
|
|
|
hi := byte(255)
|
|
|
|
|
for i := range x {
|
|
|
|
|
if i%2 == 0 {
|
|
|
|
|
x[i] = lo
|
|
|
|
|
} else {
|
|
|
|
|
x[i] = hi
|
|
|
|
|
hi--
|
|
|
|
|
if hi <= lo {
|
|
|
|
|
lo++
|
|
|
|
|
if lo == 0 {
|
|
|
|
|
lo = 1
|
|
|
|
|
}
|
|
|
|
|
hi = 255
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
x[:cap(x)][len(x)] = 0 // for sais.New
|
|
|
|
|
testSA(t, x, build)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("exhaustive2", func(t *testing.T) {
|
|
|
|
|
// All inputs over {0,1} up to length 21.
|
|
|
|
|
// Runs in about 10 seconds on my laptop.
|
|
|
|
|
x := make([]byte, 30)
|
|
|
|
|
numFail := 0
|
|
|
|
|
for n := 0; n <= 21; n++ {
|
|
|
|
|
if n > 12 && testing.Short() {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x[n] = 0 // for sais.New
|
|
|
|
|
testRec(t, x[:n], 0, 2, &numFail, build)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
t.Run("exhaustive3", func(t *testing.T) {
|
|
|
|
|
// All inputs over {0,1,2} up to length 14.
|
|
|
|
|
// Runs in about 10 seconds on my laptop.
|
|
|
|
|
x := make([]byte, 30)
|
|
|
|
|
numFail := 0
|
|
|
|
|
for n := 0; n <= 14; n++ {
|
|
|
|
|
if n > 8 && testing.Short() {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
x[n] = 0 // for sais.New
|
|
|
|
|
testRec(t, x[:n], 0, 3, &numFail, build)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// testRec fills x[i:] with all possible combinations of values in [1,max]
|
|
|
|
|
// and then calls testSA(t, x, build) for each one.
|
|
|
|
|
func testRec(t *testing.T, x []byte, i, max int, numFail *int, build func([]byte) []int) {
|
|
|
|
|
if i < len(x) {
|
|
|
|
|
for x[i] = 1; x[i] <= byte(max); x[i]++ {
|
|
|
|
|
testRec(t, x, i+1, max, numFail, build)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !testSA(t, x, build) {
|
|
|
|
|
*numFail++
|
|
|
|
|
if *numFail >= 10 {
|
|
|
|
|
t.Errorf("stopping after %d failures", *numFail)
|
|
|
|
|
t.FailNow()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// testSA tests the suffix array build function on the input x.
|
|
|
|
|
// It constructs the suffix array and then checks that it is correct.
|
|
|
|
|
func testSA(t *testing.T, x []byte, build func([]byte) []int) bool {
|
|
|
|
|
defer func() {
|
|
|
|
|
if e := recover(); e != nil {
|
|
|
|
|
t.Logf("build %v", x)
|
|
|
|
|
panic(e)
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
sa := build(x)
|
|
|
|
|
if len(sa) != len(x) {
|
|
|
|
|
t.Errorf("build %v: len(sa) = %d, want %d", x, len(sa), len(x))
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
for i := 0; i+1 < len(sa); i++ {
|
|
|
|
|
if sa[i] < 0 || sa[i] >= len(x) || sa[i+1] < 0 || sa[i+1] >= len(x) {
|
|
|
|
|
t.Errorf("build %s: sa out of range: %v\n", x, sa)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
if bytes.Compare(x[sa[i]:], x[sa[i+1]:]) >= 0 {
|
|
|
|
|
t.Errorf("build %v -> %v\nsa[%d:] = %d,%d out of order", x, sa, i, sa[i], sa[i+1])
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
var (
|
|
|
|
|
benchdata = make([]byte, 1e6)
|
|
|
|
|
benchrand = make([]byte, 1e6)
|
|
|
|
|
)
|
|
|
|
|
|
2011-09-23 09:18:10 -07:00
|
|
|
// Of all possible inputs, the random bytes have the least amount of substring
|
|
|
|
|
// repetition, and the repeated bytes have the most. For most algorithms,
|
|
|
|
|
// the running time of every input will be between these two.
|
|
|
|
|
func benchmarkNew(b *testing.B, random bool) {
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
b.ReportAllocs()
|
2011-09-23 09:18:10 -07:00
|
|
|
b.StopTimer()
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
data := benchdata
|
2011-09-23 09:18:10 -07:00
|
|
|
if random {
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
data = benchrand
|
|
|
|
|
if data[0] == 0 {
|
|
|
|
|
for i := range data {
|
|
|
|
|
data[i] = byte(rand.Intn(256))
|
|
|
|
|
}
|
2011-09-23 09:18:10 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
b.StartTimer()
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
b.SetBytes(int64(len(data)))
|
2011-09-23 09:18:10 -07:00
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
New(data)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
func makeText(name string) ([]byte, error) {
|
|
|
|
|
var data []byte
|
|
|
|
|
switch name {
|
|
|
|
|
case "opticks":
|
|
|
|
|
var err error
|
|
|
|
|
data, err = ioutil.ReadFile("../../testdata/Isaac.Newton-Opticks.txt")
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
case "go":
|
2020-11-04 18:20:17 -05:00
|
|
|
err := filepath.WalkDir("../..", func(path string, info fs.DirEntry, err error) error {
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
if err == nil && strings.HasSuffix(path, ".go") && !info.IsDir() {
|
|
|
|
|
file, err := ioutil.ReadFile(path)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
data = append(data, file...)
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
})
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
case "zero":
|
|
|
|
|
data = make([]byte, 50e6)
|
|
|
|
|
case "rand":
|
|
|
|
|
data = make([]byte, 50e6)
|
|
|
|
|
for i := range data {
|
|
|
|
|
data[i] = byte(rand.Intn(256))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return data, nil
|
2011-09-23 09:18:10 -07:00
|
|
|
}
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
|
|
|
|
|
func setBits(bits int) (cleanup func()) {
|
|
|
|
|
if bits == 32 {
|
|
|
|
|
maxData32 = realMaxData32
|
|
|
|
|
} else {
|
|
|
|
|
maxData32 = -1 // force use of 64-bit code
|
|
|
|
|
}
|
|
|
|
|
return func() {
|
|
|
|
|
maxData32 = realMaxData32
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func BenchmarkNew(b *testing.B) {
|
|
|
|
|
for _, text := range []string{"opticks", "go", "zero", "rand"} {
|
|
|
|
|
b.Run("text="+text, func(b *testing.B) {
|
|
|
|
|
data, err := makeText(text)
|
|
|
|
|
if err != nil {
|
|
|
|
|
b.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
if testing.Short() && len(data) > 5e6 {
|
|
|
|
|
data = data[:5e6]
|
|
|
|
|
}
|
|
|
|
|
for _, size := range []int{100e3, 500e3, 1e6, 5e6, 10e6, 50e6} {
|
|
|
|
|
if len(data) < size {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
data := data[:size]
|
|
|
|
|
name := fmt.Sprintf("%dK", size/1e3)
|
|
|
|
|
if size >= 1e6 {
|
|
|
|
|
name = fmt.Sprintf("%dM", size/1e6)
|
|
|
|
|
}
|
|
|
|
|
b.Run("size="+name, func(b *testing.B) {
|
|
|
|
|
for _, bits := range []int{32, 64} {
|
|
|
|
|
if ^uint(0) == 0xffffffff && bits == 64 {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
|
|
|
|
|
cleanup := setBits(bits)
|
|
|
|
|
defer cleanup()
|
|
|
|
|
|
|
|
|
|
b.SetBytes(int64(len(data)))
|
|
|
|
|
b.ReportAllocs()
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
New(data)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
2011-09-23 09:18:10 -07:00
|
|
|
}
|
|
|
|
|
|
2011-09-20 14:36:19 -07:00
|
|
|
func BenchmarkSaveRestore(b *testing.B) {
|
|
|
|
|
r := rand.New(rand.NewSource(0x5a77a1)) // guarantee always same sequence
|
2014-06-24 20:37:28 -07:00
|
|
|
data := make([]byte, 1<<20) // 1MB of data to index
|
2011-09-20 14:36:19 -07:00
|
|
|
for i := range data {
|
|
|
|
|
data[i] = byte(r.Intn(256))
|
|
|
|
|
}
|
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines,
because that's what 'int' meant in Go at the time. When we changed
the meaning of int, that doubled the space overhead of suffix arrays
for all uses, even though the vast majority of them describe less
than 2 GB of text.
The space overhead of a suffix array compared to the text is not
insignificant: there's a big difference for many uses between 4X and 8X.
This CL adjusts names in qsufsort.go so that a global search and
replace s/32/64/g produces a working 64-bit implementation,
and then it modifies suffixarray.go to choose between the 32-bit
and 64-bit implementation as appropriate depending on the input size.
The 64-bit implementation is generated by 'go generate'.
This CL also restructures the benchmarks, to test different
input sizes, different input texts, and 32-bit vs 64-bit.
The serialized form uses varint-encoded numbers and is unchanged,
so on-disk suffix arrays written by older versions of Go will be
readable by this version, and vice versa.
The 32-bit version runs a up to 17% faster than the 64-bit version
on real inputs, but more importantly it uses 50% less memory.
I have a followup CL that also implements a faster algorithm
on top of these improvements, but these are a good first step.
name 64-bit speed 32-bit speed delta
New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5)
New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5)
New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4)
New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5)
New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5)
New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4)
New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4)
New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5)
New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5)
New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5)
New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5)
New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5)
New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5)
New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5)
New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5)
SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10)
name 64-bit alloc/op 32-bit alloc/op delta
New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5)
New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4)
New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5)
New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5)
New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5)
New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4)
New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5)
New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5)
New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4)
New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4)
New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4)
New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5)
New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4)
New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4)
New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4)
New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5)
New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5)
New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5)
SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8)
https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32
Fixes #6816.
Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847
Reviewed-on: https://go-review.googlesource.com/c/go/+/174097
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2019-01-25 12:01:53 -05:00
|
|
|
for _, bits := range []int{32, 64} {
|
|
|
|
|
if ^uint(0) == 0xffffffff && bits == 64 {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
|
|
|
|
|
cleanup := setBits(bits)
|
|
|
|
|
defer cleanup()
|
|
|
|
|
|
|
|
|
|
b.StopTimer()
|
|
|
|
|
x := New(data)
|
|
|
|
|
size := testSaveRestore(nil, nil, x) // verify correctness
|
|
|
|
|
buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
|
|
|
|
|
b.SetBytes(int64(size))
|
|
|
|
|
b.StartTimer()
|
|
|
|
|
b.ReportAllocs()
|
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
|
buf.Reset()
|
|
|
|
|
if err := x.Write(buf); err != nil {
|
|
|
|
|
b.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
var y Index
|
|
|
|
|
if err := y.Read(buf); err != nil {
|
|
|
|
|
b.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
})
|
2011-09-20 14:36:19 -07:00
|
|
|
}
|
|
|
|
|
}
|