mirror of
https://github.com/golang/go.git
synced 2025-10-29 15:54:14 +00:00
index/suffixarray: add 32-bit implementation
The original index/suffixarray used 32-bit ints on 64-bit machines, because that's what 'int' meant in Go at the time. When we changed the meaning of int, that doubled the space overhead of suffix arrays for all uses, even though the vast majority of them describe less than 2 GB of text. The space overhead of a suffix array compared to the text is not insignificant: there's a big difference for many uses between 4X and 8X. This CL adjusts names in qsufsort.go so that a global search and replace s/32/64/g produces a working 64-bit implementation, and then it modifies suffixarray.go to choose between the 32-bit and 64-bit implementation as appropriate depending on the input size. The 64-bit implementation is generated by 'go generate'. This CL also restructures the benchmarks, to test different input sizes, different input texts, and 32-bit vs 64-bit. The serialized form uses varint-encoded numbers and is unchanged, so on-disk suffix arrays written by older versions of Go will be readable by this version, and vice versa. The 32-bit version runs a up to 17% faster than the 64-bit version on real inputs, but more importantly it uses 50% less memory. I have a followup CL that also implements a faster algorithm on top of these improvements, but these are a good first step. name 64-bit speed 32-bit speed delta New/text=opticks/size=100K/bits=*-12 4.44MB/s ± 0% 4.64MB/s ± 0% +4.41% (p=0.008 n=5+5) New/text=opticks/size=500K/bits=*-12 3.70MB/s ± 1% 3.82MB/s ± 0% +3.30% (p=0.008 n=5+5) New/text=go/size=100K/bits=*-12 4.40MB/s ± 0% 4.61MB/s ± 0% +4.82% (p=0.008 n=5+5) New/text=go/size=500K/bits=*-12 3.66MB/s ± 0% 3.77MB/s ± 0% +3.01% (p=0.016 n=4+5) New/text=go/size=1M/bits=*-12 3.29MB/s ± 0% 3.55MB/s ± 0% +7.90% (p=0.016 n=5+4) New/text=go/size=5M/bits=*-12 2.25MB/s ± 1% 2.65MB/s ± 0% +17.81% (p=0.008 n=5+5) New/text=go/size=10M/bits=*-12 1.82MB/s ± 0% 2.09MB/s ± 1% +14.36% (p=0.008 n=5+5) New/text=go/size=50M/bits=*-12 1.35MB/s ± 0% 1.51MB/s ± 1% +12.33% (p=0.008 n=5+5) New/text=zero/size=100K/bits=*-12 3.42MB/s ± 0% 3.32MB/s ± 0% -2.74% (p=0.000 n=5+4) New/text=zero/size=500K/bits=*-12 3.00MB/s ± 1% 2.97MB/s ± 0% -1.13% (p=0.016 n=5+4) New/text=zero/size=1M/bits=*-12 2.81MB/s ± 0% 2.78MB/s ± 2% ~ (p=0.167 n=5+5) New/text=zero/size=5M/bits=*-12 2.46MB/s ± 0% 2.53MB/s ± 0% +3.18% (p=0.008 n=5+5) New/text=zero/size=10M/bits=*-12 2.35MB/s ± 0% 2.42MB/s ± 0% +2.98% (p=0.016 n=4+5) New/text=zero/size=50M/bits=*-12 2.12MB/s ± 0% 2.18MB/s ± 0% +3.02% (p=0.008 n=5+5) New/text=rand/size=100K/bits=*-12 6.98MB/s ± 0% 7.22MB/s ± 0% +3.38% (p=0.016 n=4+5) New/text=rand/size=500K/bits=*-12 5.53MB/s ± 0% 5.64MB/s ± 0% +1.92% (p=0.008 n=5+5) New/text=rand/size=1M/bits=*-12 4.62MB/s ± 1% 5.06MB/s ± 0% +9.61% (p=0.008 n=5+5) New/text=rand/size=5M/bits=*-12 3.09MB/s ± 0% 3.43MB/s ± 0% +10.94% (p=0.016 n=4+5) New/text=rand/size=10M/bits=*-12 2.68MB/s ± 0% 2.95MB/s ± 0% +10.39% (p=0.008 n=5+5) New/text=rand/size=50M/bits=*-12 1.92MB/s ± 0% 2.06MB/s ± 1% +7.41% (p=0.008 n=5+5) SaveRestore/bits=*-12 243MB/s ± 1% 259MB/s ± 0% +6.68% (p=0.000 n=9+10) name 64-bit alloc/op 32-bit alloc/op delta New/text=opticks/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4) New/text=opticks/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.008 n=5+5) New/text=go/size=100K/bits=*-12 1.62MB ± 0% 0.81MB ± 0% -50.00% (p=0.008 n=5+5) New/text=go/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.029 n=4+4) New/text=go/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.008 n=5+5) New/text=go/size=5M/bits=*-12 80.3MB ± 0% 40.2MB ± 0% ~ (p=0.079 n=4+5) New/text=go/size=10M/bits=*-12 160MB ± 0% 80MB ± 0% -50.00% (p=0.008 n=5+5) New/text=go/size=50M/bits=*-12 805MB ± 0% 402MB ± 0% -50.06% (p=0.029 n=4+4) New/text=zero/size=100K/bits=*-12 3.02MB ± 0% 1.46MB ± 0% ~ (p=0.079 n=4+5) New/text=zero/size=500K/bits=*-12 19.7MB ± 0% 8.7MB ± 0% -55.98% (p=0.008 n=5+5) New/text=zero/size=1M/bits=*-12 39.0MB ± 0% 19.7MB ± 0% -49.60% (p=0.000 n=5+4) New/text=zero/size=5M/bits=*-12 169MB ± 0% 85MB ± 0% -49.46% (p=0.029 n=4+4) New/text=zero/size=10M/bits=*-12 333MB ± 0% 169MB ± 0% -49.43% (p=0.000 n=5+4) New/text=zero/size=50M/bits=*-12 1.63GB ± 0% 0.74GB ± 0% -54.61% (p=0.008 n=5+5) New/text=rand/size=100K/bits=*-12 1.61MB ± 0% 0.81MB ± 0% -50.00% (p=0.000 n=5+4) New/text=rand/size=500K/bits=*-12 8.07MB ± 0% 4.04MB ± 0% -49.89% (p=0.000 n=5+4) New/text=rand/size=1M/bits=*-12 16.1MB ± 0% 8.1MB ± 0% -49.95% (p=0.029 n=4+4) New/text=rand/size=5M/bits=*-12 80.7MB ± 0% 40.3MB ± 0% -50.06% (p=0.008 n=5+5) New/text=rand/size=10M/bits=*-12 161MB ± 0% 81MB ± 0% -50.03% (p=0.008 n=5+5) New/text=rand/size=50M/bits=*-12 806MB ± 0% 403MB ± 0% -50.00% (p=0.016 n=4+5) SaveRestore/bits=*-12 9.47MB ± 0% 5.28MB ± 0% -44.29% (p=0.000 n=9+8) https://perf.golang.org/search?q=upload:20190126.1+|+bits:64+vs+bits:32 Fixes #6816. Change-Id: Ied2fbea519a202ecc43719debcd233344ce38847 Reviewed-on: https://go-review.googlesource.com/c/go/+/174097 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
b098c0f467
commit
45be3530a3
5 changed files with 525 additions and 84 deletions
|
|
@ -6,7 +6,11 @@ package suffixarray
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
|
@ -207,10 +211,19 @@ func testLookups(t *testing.T, tc *testCase, x *Index, n int) {
|
|||
// index is used to hide the sort.Interface
|
||||
type index Index
|
||||
|
||||
func (x *index) Len() int { return len(x.sa) }
|
||||
func (x *index) Len() int { return x.sa.len() }
|
||||
func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }
|
||||
func (x *index) Swap(i, j int) { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
|
||||
func (a *index) at(i int) []byte { return a.data[a.sa[i]:] }
|
||||
func (x *index) Swap(i, j int) {
|
||||
if x.sa.int32 != nil {
|
||||
x.sa.int32[i], x.sa.int32[j] = x.sa.int32[j], x.sa.int32[i]
|
||||
} else {
|
||||
x.sa.int64[i], x.sa.int64[j] = x.sa.int64[j], x.sa.int64[i]
|
||||
}
|
||||
}
|
||||
|
||||
func (x *index) at(i int) []byte {
|
||||
return x.data[x.sa.get(i):]
|
||||
}
|
||||
|
||||
func testConstruction(t *testing.T, tc *testCase, x *Index) {
|
||||
if !sort.IsSorted((*index)(x)) {
|
||||
|
|
@ -222,8 +235,12 @@ func equal(x, y *Index) bool {
|
|||
if !bytes.Equal(x.data, y.data) {
|
||||
return false
|
||||
}
|
||||
for i, j := range x.sa {
|
||||
if j != y.sa[i] {
|
||||
if x.sa.len() != y.sa.len() {
|
||||
return false
|
||||
}
|
||||
n := x.sa.len()
|
||||
for i := 0; i < n; i++ {
|
||||
if x.sa.get(i) != y.sa.get(i) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
@ -238,16 +255,41 @@ func testSaveRestore(t *testing.T, tc *testCase, x *Index) int {
|
|||
}
|
||||
size := buf.Len()
|
||||
var y Index
|
||||
if err := y.Read(&buf); err != nil {
|
||||
if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
|
||||
t.Errorf("failed reading index %s (%s)", tc.name, err)
|
||||
}
|
||||
if !equal(x, &y) {
|
||||
t.Errorf("restored index doesn't match saved index %s", tc.name)
|
||||
}
|
||||
|
||||
old := maxData32
|
||||
defer func() {
|
||||
maxData32 = old
|
||||
}()
|
||||
// Reread as forced 32.
|
||||
y = Index{}
|
||||
maxData32 = realMaxData32
|
||||
if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
|
||||
t.Errorf("failed reading index %s (%s)", tc.name, err)
|
||||
}
|
||||
if !equal(x, &y) {
|
||||
t.Errorf("restored index doesn't match saved index %s", tc.name)
|
||||
}
|
||||
|
||||
// Reread as forced 64.
|
||||
y = Index{}
|
||||
maxData32 = -1
|
||||
if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
|
||||
t.Errorf("failed reading index %s (%s)", tc.name, err)
|
||||
}
|
||||
if !equal(x, &y) {
|
||||
t.Errorf("restored index doesn't match saved index %s", tc.name)
|
||||
}
|
||||
|
||||
return size
|
||||
}
|
||||
|
||||
func TestIndex(t *testing.T) {
|
||||
func testIndex(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
x := New([]byte(tc.source))
|
||||
testConstruction(t, &tc, x)
|
||||
|
|
@ -260,45 +302,162 @@ func TestIndex(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestIndex32(t *testing.T) {
|
||||
testIndex(t)
|
||||
}
|
||||
|
||||
func TestIndex64(t *testing.T) {
|
||||
maxData32 = -1
|
||||
defer func() {
|
||||
maxData32 = realMaxData32
|
||||
}()
|
||||
testIndex(t)
|
||||
}
|
||||
|
||||
var (
|
||||
benchdata = make([]byte, 1e6)
|
||||
benchrand = make([]byte, 1e6)
|
||||
)
|
||||
|
||||
// Of all possible inputs, the random bytes have the least amount of substring
|
||||
// repetition, and the repeated bytes have the most. For most algorithms,
|
||||
// the running time of every input will be between these two.
|
||||
func benchmarkNew(b *testing.B, random bool) {
|
||||
b.ReportAllocs()
|
||||
b.StopTimer()
|
||||
data := make([]byte, 1e6)
|
||||
data := benchdata
|
||||
if random {
|
||||
for i := range data {
|
||||
data[i] = byte(rand.Intn(256))
|
||||
data = benchrand
|
||||
if data[0] == 0 {
|
||||
for i := range data {
|
||||
data[i] = byte(rand.Intn(256))
|
||||
}
|
||||
}
|
||||
}
|
||||
b.StartTimer()
|
||||
b.SetBytes(int64(len(data)))
|
||||
for i := 0; i < b.N; i++ {
|
||||
New(data)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNewIndexRandom(b *testing.B) {
|
||||
benchmarkNew(b, true)
|
||||
func makeText(name string) ([]byte, error) {
|
||||
var data []byte
|
||||
switch name {
|
||||
case "opticks":
|
||||
var err error
|
||||
data, err = ioutil.ReadFile("../../testdata/Isaac.Newton-Opticks.txt")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case "go":
|
||||
err := filepath.Walk("../..", func(path string, info os.FileInfo, err error) error {
|
||||
if err == nil && strings.HasSuffix(path, ".go") && !info.IsDir() {
|
||||
file, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
data = append(data, file...)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case "zero":
|
||||
data = make([]byte, 50e6)
|
||||
case "rand":
|
||||
data = make([]byte, 50e6)
|
||||
for i := range data {
|
||||
data[i] = byte(rand.Intn(256))
|
||||
}
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
func BenchmarkNewIndexRepeat(b *testing.B) {
|
||||
benchmarkNew(b, false)
|
||||
|
||||
func setBits(bits int) (cleanup func()) {
|
||||
if bits == 32 {
|
||||
maxData32 = realMaxData32
|
||||
} else {
|
||||
maxData32 = -1 // force use of 64-bit code
|
||||
}
|
||||
return func() {
|
||||
maxData32 = realMaxData32
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNew(b *testing.B) {
|
||||
for _, text := range []string{"opticks", "go", "zero", "rand"} {
|
||||
b.Run("text="+text, func(b *testing.B) {
|
||||
data, err := makeText(text)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
if testing.Short() && len(data) > 5e6 {
|
||||
data = data[:5e6]
|
||||
}
|
||||
for _, size := range []int{100e3, 500e3, 1e6, 5e6, 10e6, 50e6} {
|
||||
if len(data) < size {
|
||||
continue
|
||||
}
|
||||
data := data[:size]
|
||||
name := fmt.Sprintf("%dK", size/1e3)
|
||||
if size >= 1e6 {
|
||||
name = fmt.Sprintf("%dM", size/1e6)
|
||||
}
|
||||
b.Run("size="+name, func(b *testing.B) {
|
||||
for _, bits := range []int{32, 64} {
|
||||
if ^uint(0) == 0xffffffff && bits == 64 {
|
||||
continue
|
||||
}
|
||||
b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
|
||||
cleanup := setBits(bits)
|
||||
defer cleanup()
|
||||
|
||||
b.SetBytes(int64(len(data)))
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
New(data)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkSaveRestore(b *testing.B) {
|
||||
b.StopTimer()
|
||||
r := rand.New(rand.NewSource(0x5a77a1)) // guarantee always same sequence
|
||||
data := make([]byte, 1<<20) // 1MB of data to index
|
||||
for i := range data {
|
||||
data[i] = byte(r.Intn(256))
|
||||
}
|
||||
x := New(data)
|
||||
size := testSaveRestore(nil, nil, x) // verify correctness
|
||||
buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
|
||||
b.SetBytes(int64(size))
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
x.Write(buf)
|
||||
var y Index
|
||||
y.Read(buf)
|
||||
for _, bits := range []int{32, 64} {
|
||||
if ^uint(0) == 0xffffffff && bits == 64 {
|
||||
continue
|
||||
}
|
||||
b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
|
||||
cleanup := setBits(bits)
|
||||
defer cleanup()
|
||||
|
||||
b.StopTimer()
|
||||
x := New(data)
|
||||
size := testSaveRestore(nil, nil, x) // verify correctness
|
||||
buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
|
||||
b.SetBytes(int64(size))
|
||||
b.StartTimer()
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
buf.Reset()
|
||||
if err := x.Write(buf); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
var y Index
|
||||
if err := y.Read(buf); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue