mirror of
				https://github.com/golang/go.git
				synced 2025-10-31 00:30:57 +00:00 
			
		
		
		
	suffixarray: faster creation algorithm
This implements the algorithm qsufsort using the sort package
as a sorting primitive. Its worst-case performance is O(N*log(N)), and it
uses only an additional slice of N ints of memory during creation.
Benchmarks (seconds):
           old    new
10k nulls          149    0.044
1M English corpus  32.0   3.6
R=gri, gri1
CC=golang-dev
https://golang.org/cl/3752044
			
			
This commit is contained in:
		
							parent
							
								
									6f62e40773
								
							
						
					
					
						commit
						e3c9565b43
					
				
					 4 changed files with 185 additions and 23 deletions
				
			
		|  | @ -5,6 +5,7 @@ | |||
| package suffixarray | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"container/vector" | ||||
| 	"regexp" | ||||
| 	"sort" | ||||
|  | @ -204,9 +205,26 @@ func testLookups(t *testing.T, tc *testCase, x *Index, n int) { | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| // index is used to hide the sort.Interface | ||||
| type index Index | ||||
| 
 | ||||
| func (x *index) Len() int           { return len(x.sa) } | ||||
| func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 } | ||||
| func (x *index) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] } | ||||
| func (a *index) at(i int) []byte    { return a.data[a.sa[i]:] } | ||||
| 
 | ||||
| 
 | ||||
| func testConstruction(t *testing.T, tc *testCase, x *Index) { | ||||
| 	if !sort.IsSorted((*index)(x)) { | ||||
| 		t.Errorf("testConstruction failed %s", tc.name) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| func TestIndex(t *testing.T) { | ||||
| 	for _, tc := range testCases { | ||||
| 		x := New([]byte(tc.source)) | ||||
| 		testConstruction(t, &tc, x) | ||||
| 		testLookups(t, &tc, x, 0) | ||||
| 		testLookups(t, &tc, x, 1) | ||||
| 		testLookups(t, &tc, x, 10) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Eric Eisner
						Eric Eisner