regexp: avoid allocation of input interface

Matters most for small inputs, because there is no real work
to amortize the allocation effort against.

benchmark                                old ns/op    new ns/op    delta
BenchmarkLiteral                               613          473  -22.84%
BenchmarkNotLiteral                           4981         4931   -1.00%
BenchmarkMatchClass                           7289         7122   -2.29%
BenchmarkMatchClass_InRange                   6618         6663   +0.68%
BenchmarkReplaceAll                           7843         7233   -7.78%
BenchmarkAnchoredLiteralShortNonMatch          329          228  -30.70%
BenchmarkAnchoredLiteralLongNonMatch           322          228  -29.19%
BenchmarkAnchoredShortMatch                    838          715  -14.68%
BenchmarkAnchoredLongMatch                     824          715  -13.23%

benchmark                                 old MB/s     new MB/s  speedup
BenchmarkMatchEasy0_32                      119.73       196.61    1.64x
BenchmarkMatchEasy0_1K                      540.58       538.33    1.00x
BenchmarkMatchEasy0_32K                     732.57       714.00    0.97x
BenchmarkMatchEasy0_1M                      726.44       708.36    0.98x
BenchmarkMatchEasy0_32M                     707.77       691.45    0.98x
BenchmarkMatchEasy1_32                      102.12       136.11    1.33x
BenchmarkMatchEasy1_1K                      298.31       307.04    1.03x
BenchmarkMatchEasy1_32K                     273.56       274.43    1.00x
BenchmarkMatchEasy1_1M                      268.42       269.23    1.00x
BenchmarkMatchEasy1_32M                     266.15       267.34    1.00x
BenchmarkMatchMedium_32                       2.53         3.38    1.34x
BenchmarkMatchMedium_1K                       9.37         9.57    1.02x
BenchmarkMatchMedium_32K                      9.29         9.67    1.04x
BenchmarkMatchMedium_1M                       9.42         9.66    1.03x
BenchmarkMatchMedium_32M                      9.41         9.62    1.02x
BenchmarkMatchHard_32                         6.66         6.75    1.01x
BenchmarkMatchHard_1K                         6.81         6.85    1.01x
BenchmarkMatchHard_32K                        6.79         6.85    1.01x
BenchmarkMatchHard_1M                         6.82         6.83    1.00x
BenchmarkMatchHard_32M                        6.80         6.80    1.00x

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5453076
This commit is contained in:
Russ Cox 2011-12-07 15:03:05 -05:00
parent 3c56a7b17e
commit 2f2cc24cd8
3 changed files with 74 additions and 81 deletions

View file

@ -240,10 +240,6 @@ type inputString struct {
str string
}
func newInputString(str string) *inputString {
return &inputString{str: str}
}
func (i *inputString) step(pos int) (rune, int) {
if pos < len(i.str) {
c := i.str[pos]
@ -283,10 +279,6 @@ type inputBytes struct {
str []byte
}
func newInputBytes(str []byte) *inputBytes {
return &inputBytes{str: str}
}
func (i *inputBytes) step(pos int) (rune, int) {
if pos < len(i.str) {
c := i.str[pos]
@ -328,10 +320,6 @@ type inputReader struct {
pos int
}
func newInputReader(r io.RuneReader) *inputReader {
return &inputReader{r: r}
}
func (i *inputReader) step(pos int) (rune, int) {
if !i.atEOT && pos != i.pos {
return endOfText, 0
@ -373,19 +361,19 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
// RuneReader. The return value is a boolean: true for match, false for no
// match.
func (re *Regexp) MatchReader(r io.RuneReader) bool {
return re.doExecute(newInputReader(r), 0, 0) != nil
return re.doExecute(r, nil, "", 0, 0) != nil
}
// MatchString returns whether the Regexp matches the string s.
// The return value is a boolean: true for match, false for no match.
func (re *Regexp) MatchString(s string) bool {
return re.doExecute(newInputString(s), 0, 0) != nil
return re.doExecute(nil, nil, s, 0, 0) != nil
}
// Match returns whether the Regexp matches the byte slice b.
// The return value is a boolean: true for match, false for no match.
func (re *Regexp) Match(b []byte) bool {
return re.doExecute(newInputBytes(b), 0, 0) != nil
return re.doExecute(nil, b, "", 0, 0) != nil
}
// MatchReader checks whether a textual regular expression matches the text
@ -437,7 +425,7 @@ func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) str
searchPos := 0 // position where we next look for a match
buf := new(bytes.Buffer)
for searchPos <= len(src) {
a := re.doExecute(newInputString(src), searchPos, 2)
a := re.doExecute(nil, nil, src, searchPos, 2)
if len(a) == 0 {
break // no more matches
}
@ -489,7 +477,7 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
searchPos := 0 // position where we next look for a match
buf := new(bytes.Buffer)
for searchPos <= len(src) {
a := re.doExecute(newInputBytes(src), searchPos, 2)
a := re.doExecute(nil, src, "", searchPos, 2)
if len(a) == 0 {
break // no more matches
}
@ -577,13 +565,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
}
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
var in input
if b == nil {
in = newInputString(s)
} else {
in = newInputBytes(b)
}
matches := re.doExecute(in, pos, re.prog.NumCap)
matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
if len(matches) == 0 {
break
}
@ -623,7 +605,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
// A return value of nil indicates no match.
func (re *Regexp) Find(b []byte) []byte {
a := re.doExecute(newInputBytes(b), 0, 2)
a := re.doExecute(nil, b, "", 0, 2)
if a == nil {
return nil
}
@ -635,7 +617,7 @@ func (re *Regexp) Find(b []byte) []byte {
// b[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindIndex(b []byte) (loc []int) {
a := re.doExecute(newInputBytes(b), 0, 2)
a := re.doExecute(nil, b, "", 0, 2)
if a == nil {
return nil
}
@ -648,7 +630,7 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
// necessary to distinguish these cases.
func (re *Regexp) FindString(s string) string {
a := re.doExecute(newInputString(s), 0, 2)
a := re.doExecute(nil, nil, s, 0, 2)
if a == nil {
return ""
}
@ -660,7 +642,7 @@ func (re *Regexp) FindString(s string) string {
// itself is at s[loc[0]:loc[1]].
// A return value of nil indicates no match.
func (re *Regexp) FindStringIndex(s string) []int {
a := re.doExecute(newInputString(s), 0, 2)
a := re.doExecute(nil, nil, s, 0, 2)
if a == nil {
return nil
}
@ -672,7 +654,7 @@ func (re *Regexp) FindStringIndex(s string) []int {
// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return
// value of nil indicates no match.
func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
a := re.doExecute(newInputReader(r), 0, 2)
a := re.doExecute(r, nil, "", 0, 2)
if a == nil {
return nil
}
@ -685,7 +667,7 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
// comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
a := re.doExecute(newInputBytes(b), 0, re.prog.NumCap)
a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
if a == nil {
return nil
}
@ -704,7 +686,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
// in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
return re.pad(re.doExecute(newInputBytes(b), 0, re.prog.NumCap))
return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
}
// FindStringSubmatch returns a slice of strings holding the text of the
@ -713,7 +695,7 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int {
// package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatch(s string) []string {
a := re.doExecute(newInputString(s), 0, re.prog.NumCap)
a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
if a == nil {
return nil
}
@ -732,7 +714,7 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
// 'Index' descriptions in the package comment.
// A return value of nil indicates no match.
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
return re.pad(re.doExecute(newInputString(s), 0, re.prog.NumCap))
return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
}
// FindReaderSubmatchIndex returns a slice holding the index pairs
@ -741,7 +723,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int {
// by the 'Submatch' and 'Index' descriptions in the package comment. A
// return value of nil indicates no match.
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
return re.pad(re.doExecute(newInputReader(r), 0, re.prog.NumCap))
return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
}
const startSize = 10 // The size at which to start a slice in the 'All' routines.