Optimization to regexp _CharClass: keep track of overall range of

charclass to avoid unnecessarily iterating over ranges.
    Also, use the fact that IntVector is an []int to avoid method calls.
    On my machine, this brings us from ~27500 ns/op to ~17500 ns/op in the benchmark I've added (it is also faster in the case where a range check
    doesn't help, added a benchmark for this too.)

    I'd also like to propose that "[]", and "[^]" be disallowed. They aren't useful as far as I can tell, they aren't widely supported, and they make reasoning about character classes a bit more complicated.

R=r
CC=golang-dev
https://golang.org/cl/1495041
This commit is contained in:
Kyle Consalus 2010-06-02 23:04:44 -07:00 committed by Rob Pike
parent 3d4c97b82c
commit aae02a1855
2 changed files with 48 additions and 10 deletions

View file

@ -519,7 +519,7 @@ var numSubexpCases = []numSubexpCase{
func TestNumSubexp(t *testing.T) {
for _, c := range numSubexpCases {
re, _ := Compile(c.input)
re := MustCompile(c.input)
n := re.NumSubexp()
if n != c.expected {
t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected)
@ -530,7 +530,7 @@ func TestNumSubexp(t *testing.T) {
func BenchmarkLiteral(b *testing.B) {
x := strings.Repeat("x", 50)
b.StopTimer()
re, _ := Compile(x)
re := MustCompile(x)
b.StartTimer()
for i := 0; i < b.N; i++ {
if !re.MatchString(x) {
@ -543,7 +543,35 @@ func BenchmarkLiteral(b *testing.B) {
func BenchmarkNotLiteral(b *testing.B) {
x := strings.Repeat("x", 49)
b.StopTimer()
re, _ := Compile("^" + x)
re := MustCompile("^" + x)
b.StartTimer()
for i := 0; i < b.N; i++ {
if !re.MatchString(x) {
println("no match!")
break
}
}
}
func BenchmarkMatchClass(b *testing.B) {
b.StopTimer()
x := strings.Repeat("xxxx", 20) + "w"
re := MustCompile("[abcdw]")
b.StartTimer()
for i := 0; i < b.N; i++ {
if !re.MatchString(x) {
println("no match!")
break
}
}
}
func BenchmarkMatchClass_InRange(b *testing.B) {
b.StopTimer()
// 'b' is betwen 'a' and 'c', so the charclass
// range checking is no help here.
x := strings.Repeat("bbbb", 20) + "c"
re := MustCompile("[ac]")
b.StartTimer()
for i := 0; i < b.N; i++ {
if !re.MatchString(x) {
@ -556,7 +584,7 @@ func BenchmarkNotLiteral(b *testing.B) {
func BenchmarkReplaceAll(b *testing.B) {
x := "abcdefghijklmnopqrstuvwxyz"
b.StopTimer()
re, _ := Compile("[cjrw]")
re := MustCompile("[cjrw]")
b.StartTimer()
for i := 0; i < b.N; i++ {
re.ReplaceAllString(x, "")