regexp: fix performance bug, make anchored searches fail fast.

The bug was that for an anchored pattern such as ^x, the prefix
scan ignored the anchor, and could scan the whole file if there was
no x present.  The fix is to do prefix matching after the anchor;
the cost miniscule; the speedups huge.

R=rsc, gri
CC=golang-dev
https://golang.org/cl/3837042
This commit is contained in:
Rob Pike 2011-01-03 11:31:51 -08:00
parent 84fc1e20f1
commit c0d0d4ef05
2 changed files with 74 additions and 7 deletions

View file

@ -377,3 +377,49 @@ func BenchmarkReplaceAll(b *testing.B) {
re.ReplaceAllString(x, "")
}
}
func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
b.StopTimer()
x := []byte("abcdefghijklmnopqrstuvwxyz")
re := MustCompile("^zbc(d|e)")
b.StartTimer()
for i := 0; i < b.N; i++ {
re.Match(x)
}
}
func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
b.StopTimer()
x := []byte("abcdefghijklmnopqrstuvwxyz")
for i := 0; i < 15; i++ {
x = append(x, x...)
}
re := MustCompile("^zbc(d|e)")
b.StartTimer()
for i := 0; i < b.N; i++ {
re.Match(x)
}
}
func BenchmarkAnchoredShortMatch(b *testing.B) {
b.StopTimer()
x := []byte("abcdefghijklmnopqrstuvwxyz")
re := MustCompile("^.bc(d|e)")
b.StartTimer()
for i := 0; i < b.N; i++ {
re.Match(x)
}
}
func BenchmarkAnchoredLongMatch(b *testing.B) {
b.StopTimer()
x := []byte("abcdefghijklmnopqrstuvwxyz")
for i := 0; i < 15; i++ {
x = append(x, x...)
}
re := MustCompile("^.bc(d|e)")
b.StartTimer()
for i := 0; i < b.N; i++ {
re.Match(x)
}
}