regexp: split bit-state execution out of machine struct

This allows the bit-state executions to have their
own pool of allocated structures. A step toward
eliminating the per-Regexp machine cache.

Note especially the -92% on MatchParallelShared.
This is real but not a complete story: the other
execution engines still need to be de-shared,
but the benchmark was only using bit-state.

The tiny slowdowns in unrelated code are noise.

name                             old time/op    new time/op    delta
Find-12                             264ns ± 3%     254ns ± 0%   -3.86%  (p=0.000 n=10+9)
FindAllNoMatches-12                 140ns ± 2%     135ns ± 0%   -3.91%  (p=0.000 n=10+9)
FindString-12                       256ns ± 0%     247ns ± 0%   -3.52%  (p=0.000 n=8+8)
FindSubmatch-12                     339ns ± 1%     334ns ± 0%   -1.41%  (p=0.000 n=9+10)
FindStringSubmatch-12               322ns ± 0%     321ns ± 0%   -0.21%  (p=0.005 n=8+9)
Literal-12                          100ns ± 2%      92ns ± 0%   -8.10%  (p=0.000 n=10+9)
NotLiteral-12                      1.50µs ± 0%    1.47µs ± 0%   -1.91%  (p=0.000 n=8+9)
MatchClass-12                      2.18µs ± 0%    2.17µs ± 0%   -0.20%  (p=0.001 n=10+7)
MatchClass_InRange-12              2.12µs ± 0%    2.13µs ± 0%   +0.23%  (p=0.000 n=10+10)
ReplaceAll-12                      1.41µs ± 0%    1.39µs ± 0%   -1.30%  (p=0.000 n=7+10)
AnchoredLiteralShortNonMatch-12    89.8ns ± 0%    83.2ns ± 0%   -7.35%  (p=0.000 n=8+8)
AnchoredLiteralLongNonMatch-12      105ns ± 3%     105ns ± 0%     ~     (p=0.186 n=10+10)
AnchoredShortMatch-12               141ns ± 0%     131ns ± 0%   -7.09%  (p=0.000 n=9+10)
AnchoredLongMatch-12                276ns ± 4%     267ns ± 0%   -3.23%  (p=0.000 n=10+10)
OnePassShortA-12                    620ns ± 0%     611ns ± 0%   -1.39%  (p=0.000 n=10+9)
NotOnePassShortA-12                 575ns ± 3%     552ns ± 0%   -3.97%  (p=0.000 n=10+8)
OnePassShortB-12                    493ns ± 0%     491ns ± 0%   -0.33%  (p=0.000 n=8+8)
NotOnePassShortB-12                 423ns ± 0%     412ns ± 0%   -2.60%  (p=0.000 n=8+9)
OnePassLongPrefix-12                112ns ± 0%     112ns ± 0%     ~     (all equal)
OnePassLongNotPrefix-12             405ns ± 0%     410ns ± 0%   +1.23%  (p=0.000 n=8+9)
MatchParallelShared-12              501ns ± 1%      39ns ± 1%  -92.27%  (p=0.000 n=10+10)
MatchParallelCopied-12             39.1ns ± 0%    39.2ns ± 3%     ~     (p=0.785 n=6+10)
QuoteMetaAll-12                    94.6ns ± 0%    94.6ns ± 0%     ~     (p=0.439 n=10+8)
QuoteMetaNone-12                   52.7ns ± 0%    52.7ns ± 0%     ~     (all equal)
Match/Easy0/32-12                  79.1ns ± 0%    72.9ns ± 0%   -7.85%  (p=0.000 n=9+9)
Match/Easy0/1K-12                   307ns ± 1%     298ns ± 0%   -2.99%  (p=0.000 n=10+6)
Match/Easy0/32K-12                 4.65µs ± 2%    4.60µs ± 2%     ~     (p=0.159 n=10+10)
Match/Easy0/1M-12                   234µs ± 0%     235µs ± 0%   +0.17%  (p=0.003 n=10+10)
Match/Easy0/32M-12                 7.98ms ± 1%    7.96ms ± 0%     ~     (p=0.278 n=9+10)
Match/Easy0i/32-12                 1.13µs ± 1%    1.09µs ± 0%   -3.24%  (p=0.000 n=9+8)
Match/Easy0i/1K-12                 32.5µs ± 0%    31.7µs ± 0%   -2.66%  (p=0.000 n=9+9)
Match/Easy0i/32K-12                1.59ms ± 0%    1.61ms ± 0%   +0.75%  (p=0.000 n=9+9)
Match/Easy0i/1M-12                 51.0ms ± 0%    51.4ms ± 0%   +0.77%  (p=0.000 n=10+8)
Match/Easy0i/32M-12                 1.63s ± 0%     1.65s ± 1%   +1.24%  (p=0.000 n=7+9)
Match/Easy1/32-12                  75.1ns ± 1%    67.9ns ± 0%   -9.54%  (p=0.000 n=8+8)
Match/Easy1/1K-12                   861ns ± 0%     884ns ± 0%   +2.71%  (p=0.000 n=8+9)
Match/Easy1/32K-12                 39.2µs ± 1%    39.2µs ± 0%     ~     (p=0.090 n=10+9)
Match/Easy1/1M-12                  1.38ms ± 0%    1.39ms ± 0%     ~     (p=0.095 n=10+9)
Match/Easy1/32M-12                 44.2ms ± 1%    44.2ms ± 1%     ~     (p=0.218 n=10+10)
Match/Medium/32-12                 1.04µs ± 1%    1.05µs ± 0%   +1.05%  (p=0.000 n=9+8)
Match/Medium/1K-12                 31.3µs ± 0%    31.3µs ± 0%   -0.14%  (p=0.004 n=9+9)
Match/Medium/32K-12                1.44ms ± 0%    1.45ms ± 0%   +0.18%  (p=0.001 n=8+8)
Match/Medium/1M-12                 46.1ms ± 0%    46.2ms ± 0%   +0.13%  (p=0.003 n=6+9)
Match/Medium/32M-12                 1.48s ± 0%     1.48s ± 0%   +0.20%  (p=0.002 n=9+8)
Match/Hard/32-12                   1.54µs ± 1%    1.49µs ± 0%   -3.60%  (p=0.000 n=9+10)
Match/Hard/1K-12                   46.4µs ± 1%    45.1µs ± 1%   -2.78%  (p=0.000 n=9+10)
Match/Hard/32K-12                  2.19ms ± 0%    2.18ms ± 1%   -0.51%  (p=0.006 n=8+9)
Match/Hard/1M-12                   70.1ms ± 0%    69.7ms ± 1%   -0.52%  (p=0.006 n=8+9)
Match/Hard/32M-12                   2.24s ± 0%     2.23s ± 1%   -0.42%  (p=0.046 n=8+9)
Match/Hard1/32-12                  8.17µs ± 1%    7.89µs ± 0%   -3.42%  (p=0.000 n=8+9)
Match/Hard1/1K-12                   254µs ± 2%     244µs ± 0%   -3.91%  (p=0.000 n=9+9)
Match/Hard1/32K-12                 9.58ms ± 1%   10.35ms ± 0%   +8.00%  (p=0.000 n=10+10)
Match/Hard1/1M-12                   306ms ± 1%     331ms ± 0%   +8.27%  (p=0.000 n=9+8)
Match/Hard1/32M-12                  9.79s ± 1%    10.60s ± 0%   +8.29%  (p=0.000 n=9+8)
Match_onepass_regex/32-12           808ns ± 0%     812ns ± 0%   +0.47%  (p=0.000 n=8+10)
Match_onepass_regex/1K-12          27.8µs ± 0%    28.5µs ± 0%   +2.32%  (p=0.000 n=8+10)
Match_onepass_regex/32K-12          925µs ± 0%     936µs ± 0%   +1.24%  (p=0.000 n=9+10)
Match_onepass_regex/1M-12          29.5ms ± 0%    30.2ms ± 0%   +2.38%  (p=0.000 n=10+10)
Match_onepass_regex/32M-12          945ms ± 0%     970ms ± 0%   +2.60%  (p=0.000 n=9+10)
CompileOnepass-12                  4.67µs ± 0%    4.63µs ± 1%   -0.84%  (p=0.000 n=10+10)
[Geo mean]                         24.5µs         23.3µs        -5.04%

https://perf.golang.org/search?q=upload:20181004.1

Change-Id: Idbc2b76223718265657819ff38be2d9aba1c54b4
Reviewed-on: https://go-review.googlesource.com/c/139779
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Russ Cox 2018-09-28 16:37:16 -04:00
parent 8e0aea162b
commit 2d4346b319
3 changed files with 162 additions and 139 deletions

View file

@ -35,37 +35,61 @@ type thread struct {
// A machine holds all the state during an NFA simulation for p.
type machine struct {
re *Regexp // corresponding Regexp
p *syntax.Prog // compiled program
op *onePassProg // compiled onepass program, or notOnePass
maxBitStateLen int // max length of string to search with bitstate
b *bitState // state for backtracker, allocated lazily
q0, q1 queue // two queues for runq, nextq
pool []*thread // pool of available threads
matched bool // whether a match was found
matchcap []int // capture information for the match
re *Regexp // corresponding Regexp
p *syntax.Prog // compiled program
op *onePassProg // compiled onepass program, or notOnePass
q0, q1 queue // two queues for runq, nextq
pool []*thread // pool of available threads
matched bool // whether a match was found
matchcap []int // capture information for the match
inputs inputs
}
type inputs struct {
// cached inputs, to avoid allocation
inputBytes inputBytes
inputString inputString
inputReader inputReader
bytes inputBytes
string inputString
reader inputReader
}
func (m *machine) newInputBytes(b []byte) input {
m.inputBytes.str = b
return &m.inputBytes
func (i *inputs) newBytes(b []byte) input {
i.bytes.str = b
return &i.bytes
}
func (m *machine) newInputString(s string) input {
m.inputString.str = s
return &m.inputString
func (i *inputs) newString(s string) input {
i.string.str = s
return &i.string
}
func (m *machine) newInputReader(r io.RuneReader) input {
m.inputReader.r = r
m.inputReader.atEOT = false
m.inputReader.pos = 0
return &m.inputReader
func (i *inputs) newReader(r io.RuneReader) input {
i.reader.r = r
i.reader.atEOT = false
i.reader.pos = 0
return &i.reader
}
func (i *inputs) clear() {
// We need to clear 1 of these.
// Avoid the expense of clearing the others (pointer write barrier).
if i.bytes.str != nil {
i.bytes.str = nil
} else if i.reader.r != nil {
i.reader.r = nil
} else {
i.string.str = ""
}
}
func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) {
if r != nil {
return i.newReader(r), 0
}
if b != nil {
return i.newBytes(b), len(b)
}
return i.newString(s), len(s)
}
// progMachine returns a new machine running the prog p.
@ -78,9 +102,6 @@ func progMachine(p *syntax.Prog, op *onePassProg) *machine {
if ncap < 2 {
ncap = 2
}
if op == notOnePass {
m.maxBitStateLen = maxBitStateLen(p)
}
m.matchcap = make([]int, ncap)
return m
}
@ -416,31 +437,23 @@ func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
//
// nil is returned if no matches are found and non-nil if matches are found.
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
m := re.get()
var i input
var size int
if r != nil {
i = m.newInputReader(r)
} else if b != nil {
i = m.newInputBytes(b)
size = len(b)
} else {
i = m.newInputString(s)
size = len(s)
if dstCap == nil {
// Make sure 'return dstCap' is non-nil.
dstCap = arrayNoInts[:0:0]
}
if re.onepass == notOnePass && r == nil && len(b)+len(s) < re.maxBitStateLen {
return re.backtrack(b, s, pos, ncap, dstCap)
}
m := re.get()
i, _ := m.inputs.init(r, b, s)
if m.op != notOnePass {
if !m.onepass(i, pos, ncap) {
re.put(m)
return nil
}
} else if size < m.maxBitStateLen && r == nil {
if m.b == nil {
m.b = newBitState(m.p)
}
if !m.backtrack(i, pos, size, ncap) {
re.put(m)
return nil
}
} else {
m.init(ncap)
if !m.match(i, pos) {
@ -449,10 +462,6 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
}
}
dstCap = append(dstCap, m.matchcap...)
if dstCap == nil {
// Keep the promise of returning non-nil value on match.
dstCap = arrayNoInts[:0]
}
re.put(m)
return dstCap
}