regexp: split one-pass execution out of machine struct

This allows the one-pass executions to have their
own pool of (much smaller) allocated structures.
A step toward eliminating the per-Regexp machine cache.

Not much effect on benchmarks, since there are no
optimizations here, and pools are a tiny bit slower than a
locked data structure for single-threaded code.

name                             old time/op    new time/op    delta
Find-12                             254ns ± 0%     252ns ± 0%  -0.94%  (p=0.000 n=9+10)
FindAllNoMatches-12                 135ns ± 0%     134ns ± 1%  -0.49%  (p=0.002 n=9+9)
FindString-12                       247ns ± 0%     246ns ± 0%  -0.24%  (p=0.003 n=8+10)
FindSubmatch-12                     334ns ± 0%     333ns ± 2%    ~     (p=0.283 n=10+10)
FindStringSubmatch-12               321ns ± 0%     320ns ± 0%  -0.51%  (p=0.000 n=9+10)
Literal-12                         92.2ns ± 0%    91.1ns ± 0%  -1.25%  (p=0.000 n=9+10)
NotLiteral-12                      1.47µs ± 0%    1.45µs ± 0%  -0.99%  (p=0.000 n=9+10)
MatchClass-12                      2.17µs ± 0%    2.19µs ± 0%  +0.84%  (p=0.000 n=7+9)
MatchClass_InRange-12              2.13µs ± 0%    2.09µs ± 0%  -1.70%  (p=0.000 n=10+10)
ReplaceAll-12                      1.39µs ± 0%    1.39µs ± 0%  +0.51%  (p=0.000 n=10+10)
AnchoredLiteralShortNonMatch-12    83.2ns ± 0%    82.4ns ± 0%  -0.96%  (p=0.000 n=8+8)
AnchoredLiteralLongNonMatch-12      105ns ± 0%     106ns ± 1%    ~     (p=0.087 n=10+10)
AnchoredShortMatch-12               131ns ± 0%     130ns ± 0%  -0.76%  (p=0.000 n=10+9)
AnchoredLongMatch-12                267ns ± 0%     272ns ± 0%  +2.01%  (p=0.000 n=10+8)
OnePassShortA-12                    611ns ± 0%     615ns ± 0%  +0.61%  (p=0.000 n=9+10)
NotOnePassShortA-12                 552ns ± 0%     549ns ± 0%  -0.46%  (p=0.000 n=8+9)
OnePassShortB-12                    491ns ± 0%     494ns ± 0%  +0.61%  (p=0.000 n=8+8)
NotOnePassShortB-12                 412ns ± 0%     412ns ± 1%    ~     (p=0.151 n=9+10)
OnePassLongPrefix-12                112ns ± 0%     108ns ± 0%  -3.57%  (p=0.000 n=10+10)
OnePassLongNotPrefix-12             410ns ± 0%     402ns ± 0%  -1.95%  (p=0.000 n=9+8)
MatchParallelShared-12             38.8ns ± 1%    38.6ns ± 2%    ~     (p=0.536 n=10+9)
MatchParallelCopied-12             39.2ns ± 3%    39.4ns ± 7%    ~     (p=0.986 n=10+10)
QuoteMetaAll-12                    94.6ns ± 0%    94.9ns ± 0%  +0.29%  (p=0.001 n=8+9)
QuoteMetaNone-12                   52.7ns ± 0%    52.7ns ± 0%    ~     (all equal)
Match/Easy0/32-12                  72.9ns ± 0%    72.1ns ± 0%  -1.07%  (p=0.000 n=9+9)
Match/Easy0/1K-12                   298ns ± 0%     298ns ± 0%    ~     (p=0.140 n=6+8)
Match/Easy0/32K-12                 4.60µs ± 2%    4.64µs ± 1%    ~     (p=0.171 n=10+10)
Match/Easy0/1M-12                   235µs ± 0%     234µs ± 0%  -0.14%  (p=0.004 n=10+10)
Match/Easy0/32M-12                 7.96ms ± 0%    7.95ms ± 0%  -0.12%  (p=0.043 n=10+9)
Match/Easy0i/32-12                 1.09µs ± 0%    1.10µs ± 0%  +0.15%  (p=0.000 n=8+9)
Match/Easy0i/1K-12                 31.7µs ± 0%    31.8µs ± 1%    ~     (p=0.905 n=9+10)
Match/Easy0i/32K-12                1.61ms ± 0%    1.62ms ± 1%  +1.12%  (p=0.000 n=9+10)
Match/Easy0i/1M-12                 51.4ms ± 0%    51.8ms ± 0%  +0.85%  (p=0.000 n=8+8)
Match/Easy0i/32M-12                 1.65s ± 1%     1.65s ± 0%    ~     (p=0.113 n=9+9)
Match/Easy1/32-12                  67.9ns ± 0%    67.7ns ± 1%    ~     (p=0.232 n=8+10)
Match/Easy1/1K-12                   884ns ± 0%     873ns ± 0%  -1.29%  (p=0.000 n=9+10)
Match/Easy1/32K-12                 39.2µs ± 0%    39.4µs ± 0%  +0.50%  (p=0.000 n=9+10)
Match/Easy1/1M-12                  1.39ms ± 0%    1.39ms ± 0%  +0.29%  (p=0.000 n=9+10)
Match/Easy1/32M-12                 44.2ms ± 1%    44.3ms ± 0%  +0.21%  (p=0.029 n=10+10)
Match/Medium/32-12                 1.05µs ± 0%    1.04µs ± 0%  -0.27%  (p=0.001 n=8+9)
Match/Medium/1K-12                 31.3µs ± 0%    31.4µs ± 0%  +0.39%  (p=0.000 n=9+8)
Match/Medium/32K-12                1.45ms ± 0%    1.45ms ± 0%  +0.33%  (p=0.000 n=8+9)
Match/Medium/1M-12                 46.2ms ± 0%    46.4ms ± 0%  +0.35%  (p=0.000 n=9+8)
Match/Medium/32M-12                 1.48s ± 0%     1.49s ± 1%  +0.70%  (p=0.000 n=8+10)
Match/Hard/32-12                   1.49µs ± 0%    1.48µs ± 0%  -0.43%  (p=0.000 n=10+9)
Match/Hard/1K-12                   45.1µs ± 1%    45.0µs ± 1%    ~     (p=0.393 n=10+10)
Match/Hard/32K-12                  2.18ms ± 1%    2.24ms ± 0%  +2.71%  (p=0.000 n=9+8)
Match/Hard/1M-12                   69.7ms ± 1%    71.6ms ± 0%  +2.76%  (p=0.000 n=9+7)
Match/Hard/32M-12                   2.23s ± 1%     2.29s ± 0%  +2.65%  (p=0.000 n=9+9)
Match/Hard1/32-12                  7.89µs ± 0%    7.89µs ± 0%    ~     (p=0.286 n=9+9)
Match/Hard1/1K-12                   244µs ± 0%     244µs ± 0%    ~     (p=0.905 n=9+10)
Match/Hard1/32K-12                 10.3ms ± 0%    10.3ms ± 0%    ~     (p=0.796 n=10+10)
Match/Hard1/1M-12                   331ms ± 0%     331ms ± 0%    ~     (p=0.167 n=8+9)
Match/Hard1/32M-12                  10.6s ± 0%     10.6s ± 0%    ~     (p=0.315 n=8+10)
Match_onepass_regex/32-12           812ns ± 0%     830ns ± 0%  +2.19%  (p=0.000 n=10+9)
Match_onepass_regex/1K-12          28.5µs ± 0%    28.7µs ± 1%  +0.97%  (p=0.000 n=10+9)
Match_onepass_regex/32K-12          936µs ± 0%     949µs ± 0%  +1.43%  (p=0.000 n=10+8)
Match_onepass_regex/1M-12          30.2ms ± 0%    30.4ms ± 0%  +0.62%  (p=0.000 n=10+8)
Match_onepass_regex/32M-12          970ms ± 0%     973ms ± 0%  +0.35%  (p=0.000 n=10+9)
CompileOnepass-12                  4.63µs ± 1%    4.64µs ± 0%    ~     (p=0.060 n=10+10)
[Geo mean]                         23.3µs         23.3µs       +0.12%

https://perf.golang.org/search?q=upload:20181004.2

Change-Id: Iff9e9f9d4a4698162126a2f300e8ed1b1a39361e
Reviewed-on: https://go-review.googlesource.com/c/139780
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Russ Cox 2018-10-02 09:29:47 -04:00
parent 2d4346b319
commit 60b2971180
6 changed files with 132 additions and 103 deletions

View file

@ -7,6 +7,7 @@ package regexp
import (
"io"
"regexp/syntax"
"sync"
)
// A queue is a 'sparse array' holding pending threads of execution.
@ -37,7 +38,6 @@ type thread struct {
type machine struct {
re *Regexp // corresponding Regexp
p *syntax.Prog // compiled program
op *onePassProg // compiled onepass program, or notOnePass
q0, q1 queue // two queues for runq, nextq
pool []*thread // pool of available threads
matched bool // whether a match was found
@ -93,8 +93,8 @@ func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) {
}
// progMachine returns a new machine running the prog p.
func progMachine(p *syntax.Prog, op *onePassProg) *machine {
m := &machine{p: p, op: op}
func progMachine(p *syntax.Prog) *machine {
m := &machine{p: p}
n := len(m.p.Inst)
m.q0 = queue{make([]uint32, n), make([]entry, 0, n)}
m.q1 = queue{make([]uint32, n), make([]entry, 0, n)}
@ -327,20 +327,47 @@ func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond syntax.Empty
return t
}
// onepass runs the machine over the input starting at pos.
// It reports whether a match was found.
// If so, m.matchcap holds the submatch information.
// ncap is the number of captures.
func (m *machine) onepass(i input, pos, ncap int) bool {
startCond := m.re.cond
if startCond == ^syntax.EmptyOp(0) { // impossible
return false
type onePassMachine struct {
inputs inputs
matchcap []int
}
var onePassPool sync.Pool
func newOnePassMachine() *onePassMachine {
m, ok := onePassPool.Get().(*onePassMachine)
if !ok {
m = new(onePassMachine)
}
m.matched = false
m.matchcap = m.matchcap[:ncap]
return m
}
func freeOnePassMachine(m *onePassMachine) {
m.inputs.clear()
onePassPool.Put(m)
}
// doOnePass implements r.doExecute using the one-pass execution engine.
func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int {
startCond := re.cond
if startCond == ^syntax.EmptyOp(0) { // impossible
return nil
}
m := newOnePassMachine()
if cap(m.matchcap) < ncap {
m.matchcap = make([]int, ncap)
} else {
m.matchcap = m.matchcap[:ncap]
}
matched := false
for i := range m.matchcap {
m.matchcap[i] = -1
}
i, _ := m.inputs.init(ir, ib, is)
r, r1 := endOfText, endOfText
width, width1 := 0, 0
r, width = i.step(pos)
@ -353,59 +380,59 @@ func (m *machine) onepass(i input, pos, ncap int) bool {
} else {
flag = i.context(pos)
}
pc := m.op.Start
inst := m.op.Inst[pc]
pc := re.onepass.Start
inst := re.onepass.Inst[pc]
// If there is a simple literal prefix, skip over it.
if pos == 0 && syntax.EmptyOp(inst.Arg)&^flag == 0 &&
len(m.re.prefix) > 0 && i.canCheckPrefix() {
len(re.prefix) > 0 && i.canCheckPrefix() {
// Match requires literal prefix; fast search for it.
if !i.hasPrefix(m.re) {
return m.matched
if !i.hasPrefix(re) {
goto Return
}
pos += len(m.re.prefix)
pos += len(re.prefix)
r, width = i.step(pos)
r1, width1 = i.step(pos + width)
flag = i.context(pos)
pc = int(m.re.prefixEnd)
pc = int(re.prefixEnd)
}
for {
inst = m.op.Inst[pc]
inst = re.onepass.Inst[pc]
pc = int(inst.Out)
switch inst.Op {
default:
panic("bad inst")
case syntax.InstMatch:
m.matched = true
matched = true
if len(m.matchcap) > 0 {
m.matchcap[0] = 0
m.matchcap[1] = pos
}
return m.matched
goto Return
case syntax.InstRune:
if !inst.MatchRune(r) {
return m.matched
goto Return
}
case syntax.InstRune1:
if r != inst.Rune[0] {
return m.matched
goto Return
}
case syntax.InstRuneAny:
// Nothing
case syntax.InstRuneAnyNotNL:
if r == '\n' {
return m.matched
goto Return
}
// peek at the input rune to see which branch of the Alt to take
case syntax.InstAlt, syntax.InstAltMatch:
pc = int(onePassNext(&inst, r))
continue
case syntax.InstFail:
return m.matched
goto Return
case syntax.InstNop:
continue
case syntax.InstEmptyWidth:
if syntax.EmptyOp(inst.Arg)&^flag != 0 {
return m.matched
goto Return
}
continue
case syntax.InstCapture:
@ -424,7 +451,16 @@ func (m *machine) onepass(i input, pos, ncap int) bool {
r1, width1 = i.step(pos + width)
}
}
return m.matched
Return:
if !matched {
freeOnePassMachine(m)
return nil
}
dstCap = append(dstCap, m.matchcap...)
freeOnePassMachine(m)
return dstCap
}
// doMatch reports whether either r, b or s match the regexp.
@ -442,25 +478,22 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
dstCap = arrayNoInts[:0:0]
}
if re.onepass == notOnePass && r == nil && len(b)+len(s) < re.maxBitStateLen {
if re.onepass != nil {
return re.doOnePass(r, b, s, pos, ncap, dstCap)
}
if r == nil && len(b)+len(s) < re.maxBitStateLen {
return re.backtrack(b, s, pos, ncap, dstCap)
}
m := re.get()
i, _ := m.inputs.init(r, b, s)
if m.op != notOnePass {
if !m.onepass(i, pos, ncap) {
re.put(m)
return nil
}
} else {
m.init(ncap)
if !m.match(i, pos) {
re.put(m)
return nil
}
m.init(ncap)
if !m.match(i, pos) {
re.put(m)
return nil
}
dstCap = append(dstCap, m.matchcap...)
re.put(m)
return dstCap