regexp: avoid allocation of input interface

Matters most for small inputs, because there is no real work
to amortize the allocation effort against.

benchmark                                old ns/op    new ns/op    delta
BenchmarkLiteral                               613          473  -22.84%
BenchmarkNotLiteral                           4981         4931   -1.00%
BenchmarkMatchClass                           7289         7122   -2.29%
BenchmarkMatchClass_InRange                   6618         6663   +0.68%
BenchmarkReplaceAll                           7843         7233   -7.78%
BenchmarkAnchoredLiteralShortNonMatch          329          228  -30.70%
BenchmarkAnchoredLiteralLongNonMatch           322          228  -29.19%
BenchmarkAnchoredShortMatch                    838          715  -14.68%
BenchmarkAnchoredLongMatch                     824          715  -13.23%

benchmark                                 old MB/s     new MB/s  speedup
BenchmarkMatchEasy0_32                      119.73       196.61    1.64x
BenchmarkMatchEasy0_1K                      540.58       538.33    1.00x
BenchmarkMatchEasy0_32K                     732.57       714.00    0.97x
BenchmarkMatchEasy0_1M                      726.44       708.36    0.98x
BenchmarkMatchEasy0_32M                     707.77       691.45    0.98x
BenchmarkMatchEasy1_32                      102.12       136.11    1.33x
BenchmarkMatchEasy1_1K                      298.31       307.04    1.03x
BenchmarkMatchEasy1_32K                     273.56       274.43    1.00x
BenchmarkMatchEasy1_1M                      268.42       269.23    1.00x
BenchmarkMatchEasy1_32M                     266.15       267.34    1.00x
BenchmarkMatchMedium_32                       2.53         3.38    1.34x
BenchmarkMatchMedium_1K                       9.37         9.57    1.02x
BenchmarkMatchMedium_32K                      9.29         9.67    1.04x
BenchmarkMatchMedium_1M                       9.42         9.66    1.03x
BenchmarkMatchMedium_32M                      9.41         9.62    1.02x
BenchmarkMatchHard_32                         6.66         6.75    1.01x
BenchmarkMatchHard_1K                         6.81         6.85    1.01x
BenchmarkMatchHard_32K                        6.79         6.85    1.01x
BenchmarkMatchHard_1M                         6.82         6.83    1.00x
BenchmarkMatchHard_32M                        6.80         6.80    1.00x

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5453076
This commit is contained in:
Russ Cox 2011-12-07 15:03:05 -05:00
parent 3c56a7b17e
commit 2f2cc24cd8
3 changed files with 74 additions and 81 deletions

View file

@ -1,6 +1,9 @@
package regexp
import "regexp/syntax"
import (
"io"
"regexp/syntax"
)
// A queue is a 'sparse array' holding pending threads of execution.
// See http://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html
@ -34,6 +37,28 @@ type machine struct {
pool []*thread // pool of available threads
matched bool // whether a match was found
matchcap []int // capture information for the match
// cached inputs, to avoid allocation
inputBytes inputBytes
inputString inputString
inputReader inputReader
}
func (m *machine) newInputBytes(b []byte) input {
m.inputBytes.str = b
return &m.inputBytes
}
func (m *machine) newInputString(s string) input {
m.inputString.str = s
return &m.inputString
}
func (m *machine) newInputReader(r io.RuneReader) input {
m.inputReader.r = r
m.inputReader.atEOT = false
m.inputReader.pos = 0
return &m.inputReader
}
// progMachine returns a new machine running the prog p.
@ -74,6 +99,9 @@ func (m *machine) alloc(i *syntax.Inst) *thread {
// free returns t to the free pool.
func (m *machine) free(t *thread) {
m.inputBytes.str = nil
m.inputString.str = ""
m.inputReader.r = nil
m.pool = append(m.pool, t)
}
@ -287,8 +315,16 @@ var empty = make([]int, 0)
// doExecute finds the leftmost match in the input and returns
// the position of its subexpressions.
func (re *Regexp) doExecute(i input, pos int, ncap int) []int {
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int {
m := re.get()
var i input
if r != nil {
i = m.newInputReader(r)
} else if b != nil {
i = m.newInputBytes(b)
} else {
i = m.newInputString(s)
}
m.init(ncap)
if !m.match(i, pos) {
re.put(m)