mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
regexp: reduce mallocs in Regexp.Find* and Regexp.ReplaceAll*.
This improves Regexp.Find* and Regexp.ReplaceAll* speed: name old time/op new time/op delta Find-4 345ns ± 1% 314ns ± 1% -8.94% (p=0.000 n=9+8) FindString-4 341ns ± 1% 308ns ± 0% -9.85% (p=0.000 n=10+9) FindSubmatch-4 440ns ± 1% 404ns ± 0% -8.27% (p=0.000 n=10+8) FindStringSubmatch-4 426ns ± 0% 387ns ± 0% -9.07% (p=0.000 n=10+9) ReplaceAll-4 1.75µs ± 1% 1.67µs ± 0% -4.45% (p=0.000 n=9+10) name old alloc/op new alloc/op delta Find-4 16.0B ± 0% 0.0B ±NaN% -100.00% (p=0.000 n=10+10) FindString-4 16.0B ± 0% 0.0B ±NaN% -100.00% (p=0.000 n=10+10) FindSubmatch-4 80.0B ± 0% 48.0B ± 0% -40.00% (p=0.000 n=10+10) FindStringSubmatch-4 64.0B ± 0% 32.0B ± 0% -50.00% (p=0.000 n=10+10) ReplaceAll-4 152B ± 0% 104B ± 0% -31.58% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Find-4 1.00 ± 0% 0.00 ±NaN% -100.00% (p=0.000 n=10+10) FindString-4 1.00 ± 0% 0.00 ±NaN% -100.00% (p=0.000 n=10+10) FindSubmatch-4 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) FindStringSubmatch-4 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.000 n=10+10) ReplaceAll-4 8.00 ± 0% 5.00 ± 0% -37.50% (p=0.000 n=10+10) Fixes #15643 Change-Id: I594fe51172373e2adb98d1d25c76ca2cde54ff48 Reviewed-on: https://go-review.googlesource.com/23030 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
5923df1af9
commit
bea39e63ec
3 changed files with 104 additions and 28 deletions
|
|
@ -564,6 +564,72 @@ func TestSwitchBacktrack(t *testing.T) {
|
||||||
re.Match(long[:1]) // triggers backtracker
|
re.Match(long[:1]) // triggers backtracker
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkFind(b *testing.B) {
|
||||||
|
b.StopTimer()
|
||||||
|
re := MustCompile("a+b+")
|
||||||
|
wantSubs := "aaabb"
|
||||||
|
s := []byte("acbb" + wantSubs + "dd")
|
||||||
|
b.StartTimer()
|
||||||
|
b.ReportAllocs()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
subs := re.Find(s)
|
||||||
|
if string(subs) != wantSubs {
|
||||||
|
b.Fatalf("Find(%q) = %q; want %q", s, subs, wantSubs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindString(b *testing.B) {
|
||||||
|
b.StopTimer()
|
||||||
|
re := MustCompile("a+b+")
|
||||||
|
wantSubs := "aaabb"
|
||||||
|
s := "acbb" + wantSubs + "dd"
|
||||||
|
b.StartTimer()
|
||||||
|
b.ReportAllocs()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
subs := re.FindString(s)
|
||||||
|
if subs != wantSubs {
|
||||||
|
b.Fatalf("FindString(%q) = %q; want %q", s, subs, wantSubs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindSubmatch(b *testing.B) {
|
||||||
|
b.StopTimer()
|
||||||
|
re := MustCompile("a(a+b+)b")
|
||||||
|
wantSubs := "aaabb"
|
||||||
|
s := []byte("acbb" + wantSubs + "dd")
|
||||||
|
b.StartTimer()
|
||||||
|
b.ReportAllocs()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
subs := re.FindSubmatch(s)
|
||||||
|
if string(subs[0]) != wantSubs {
|
||||||
|
b.Fatalf("FindSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
|
||||||
|
}
|
||||||
|
if string(subs[1]) != "aab" {
|
||||||
|
b.Fatalf("FindSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkFindStringSubmatch(b *testing.B) {
|
||||||
|
b.StopTimer()
|
||||||
|
re := MustCompile("a(a+b+)b")
|
||||||
|
wantSubs := "aaabb"
|
||||||
|
s := "acbb" + wantSubs + "dd"
|
||||||
|
b.StartTimer()
|
||||||
|
b.ReportAllocs()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
subs := re.FindStringSubmatch(s)
|
||||||
|
if subs[0] != wantSubs {
|
||||||
|
b.Fatalf("FindStringSubmatch(%q)[0] = %q; want %q", s, subs[0], wantSubs)
|
||||||
|
}
|
||||||
|
if subs[1] != "aab" {
|
||||||
|
b.Fatalf("FindStringSubmatch(%q)[1] = %q; want %q", s, subs[1], "aab")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkLiteral(b *testing.B) {
|
func BenchmarkLiteral(b *testing.B) {
|
||||||
x := strings.Repeat("x", 50) + "y"
|
x := strings.Repeat("x", 50) + "y"
|
||||||
b.StopTimer()
|
b.StopTimer()
|
||||||
|
|
|
||||||
|
|
@ -405,14 +405,16 @@ func (m *machine) onepass(i input, pos int) bool {
|
||||||
return m.matched
|
return m.matched
|
||||||
}
|
}
|
||||||
|
|
||||||
// empty is a non-nil 0-element slice,
|
// doMatch reports whether either r, b or s match the regexp.
|
||||||
// so doExecute can avoid an allocation
|
func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
|
||||||
// when 0 captures are requested from a successful match.
|
return re.doExecute(r, b, s, 0, 0, nil) != nil
|
||||||
var empty = make([]int, 0)
|
}
|
||||||
|
|
||||||
// doExecute finds the leftmost match in the input and returns
|
// doExecute finds the leftmost match in the input, appends the position
|
||||||
// the position of its subexpressions.
|
// of its subexpressions to dstCap and returns dstCap.
|
||||||
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int) []int {
|
//
|
||||||
|
// nil is returned if no matches are found and non-nil if matches are found.
|
||||||
|
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
|
||||||
m := re.get()
|
m := re.get()
|
||||||
var i input
|
var i input
|
||||||
var size int
|
var size int
|
||||||
|
|
@ -445,12 +447,15 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ncap == 0 {
|
dstCap = append(dstCap, m.matchcap...)
|
||||||
re.put(m)
|
if dstCap == nil {
|
||||||
return empty // empty but not nil
|
// Keep the promise of returning non-nil value on match.
|
||||||
|
dstCap = arrayNoInts[:0]
|
||||||
}
|
}
|
||||||
cap := make([]int, len(m.matchcap))
|
|
||||||
copy(cap, m.matchcap)
|
|
||||||
re.put(m)
|
re.put(m)
|
||||||
return cap
|
return dstCap
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// arrayNoInts is returned by doExecute match if nil dstCap is passed
|
||||||
|
// to it with ncap=0.
|
||||||
|
var arrayNoInts [0]int
|
||||||
|
|
|
||||||
|
|
@ -408,17 +408,17 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
|
||||||
// MatchReader reports whether the Regexp matches the text read by the
|
// MatchReader reports whether the Regexp matches the text read by the
|
||||||
// RuneReader.
|
// RuneReader.
|
||||||
func (re *Regexp) MatchReader(r io.RuneReader) bool {
|
func (re *Regexp) MatchReader(r io.RuneReader) bool {
|
||||||
return re.doExecute(r, nil, "", 0, 0) != nil
|
return re.doMatch(r, nil, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchString reports whether the Regexp matches the string s.
|
// MatchString reports whether the Regexp matches the string s.
|
||||||
func (re *Regexp) MatchString(s string) bool {
|
func (re *Regexp) MatchString(s string) bool {
|
||||||
return re.doExecute(nil, nil, s, 0, 0) != nil
|
return re.doMatch(nil, nil, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match reports whether the Regexp matches the byte slice b.
|
// Match reports whether the Regexp matches the byte slice b.
|
||||||
func (re *Regexp) Match(b []byte) bool {
|
func (re *Regexp) Match(b []byte) bool {
|
||||||
return re.doExecute(nil, b, "", 0, 0) != nil
|
return re.doMatch(nil, b, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchReader checks whether a textual regular expression matches the text
|
// MatchReader checks whether a textual regular expression matches the text
|
||||||
|
|
@ -502,8 +502,9 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst
|
||||||
nmatch = re.prog.NumCap
|
nmatch = re.prog.NumCap
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var dstCap [2]int
|
||||||
for searchPos <= endPos {
|
for searchPos <= endPos {
|
||||||
a := re.doExecute(nil, bsrc, src, searchPos, nmatch)
|
a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
|
||||||
if len(a) == 0 {
|
if len(a) == 0 {
|
||||||
break // no more matches
|
break // no more matches
|
||||||
}
|
}
|
||||||
|
|
@ -641,7 +642,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
|
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
|
||||||
matches := re.doExecute(nil, b, s, pos, re.prog.NumCap)
|
matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
|
||||||
if len(matches) == 0 {
|
if len(matches) == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -681,7 +682,8 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
|
||||||
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
|
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) Find(b []byte) []byte {
|
func (re *Regexp) Find(b []byte) []byte {
|
||||||
a := re.doExecute(nil, b, "", 0, 2)
|
var dstCap [2]int
|
||||||
|
a := re.doExecute(nil, b, "", 0, 2, dstCap[:0])
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -693,7 +695,7 @@ func (re *Regexp) Find(b []byte) []byte {
|
||||||
// b[loc[0]:loc[1]].
|
// b[loc[0]:loc[1]].
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
||||||
a := re.doExecute(nil, b, "", 0, 2)
|
a := re.doExecute(nil, b, "", 0, 2, nil)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -706,7 +708,8 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
||||||
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
|
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
|
||||||
// necessary to distinguish these cases.
|
// necessary to distinguish these cases.
|
||||||
func (re *Regexp) FindString(s string) string {
|
func (re *Regexp) FindString(s string) string {
|
||||||
a := re.doExecute(nil, nil, s, 0, 2)
|
var dstCap [2]int
|
||||||
|
a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0])
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
@ -718,7 +721,7 @@ func (re *Regexp) FindString(s string) string {
|
||||||
// itself is at s[loc[0]:loc[1]].
|
// itself is at s[loc[0]:loc[1]].
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindStringIndex(s string) (loc []int) {
|
func (re *Regexp) FindStringIndex(s string) (loc []int) {
|
||||||
a := re.doExecute(nil, nil, s, 0, 2)
|
a := re.doExecute(nil, nil, s, 0, 2, nil)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -731,7 +734,7 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
|
||||||
// byte offset loc[0] through loc[1]-1.
|
// byte offset loc[0] through loc[1]-1.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
|
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
|
||||||
a := re.doExecute(r, nil, "", 0, 2)
|
a := re.doExecute(r, nil, "", 0, 2, nil)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -744,7 +747,8 @@ func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
|
||||||
// comment.
|
// comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
||||||
a := re.doExecute(nil, b, "", 0, re.prog.NumCap)
|
var dstCap [4]int
|
||||||
|
a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0])
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -891,7 +895,7 @@ func extract(str string) (name string, num int, rest string, ok bool) {
|
||||||
// in the package comment.
|
// in the package comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
||||||
return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap))
|
return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil))
|
||||||
}
|
}
|
||||||
|
|
||||||
// FindStringSubmatch returns a slice of strings holding the text of the
|
// FindStringSubmatch returns a slice of strings holding the text of the
|
||||||
|
|
@ -900,7 +904,8 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
||||||
// package comment.
|
// package comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindStringSubmatch(s string) []string {
|
func (re *Regexp) FindStringSubmatch(s string) []string {
|
||||||
a := re.doExecute(nil, nil, s, 0, re.prog.NumCap)
|
var dstCap [4]int
|
||||||
|
a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0])
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -919,7 +924,7 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
|
||||||
// 'Index' descriptions in the package comment.
|
// 'Index' descriptions in the package comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
||||||
return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap))
|
return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil))
|
||||||
}
|
}
|
||||||
|
|
||||||
// FindReaderSubmatchIndex returns a slice holding the index pairs
|
// FindReaderSubmatchIndex returns a slice holding the index pairs
|
||||||
|
|
@ -928,7 +933,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
||||||
// by the 'Submatch' and 'Index' descriptions in the package comment. A
|
// by the 'Submatch' and 'Index' descriptions in the package comment. A
|
||||||
// return value of nil indicates no match.
|
// return value of nil indicates no match.
|
||||||
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
|
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
|
||||||
return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap))
|
return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil))
|
||||||
}
|
}
|
||||||
|
|
||||||
const startSize = 10 // The size at which to start a slice in the 'All' routines.
|
const startSize = 10 // The size at which to start a slice in the 'All' routines.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue