cmd/internal/gc: emit write barriers at lower level

This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.

name                                       old                     new          delta
BenchmarkBinaryTree17              18.2s × (0.99,1.01)     17.9s × (0.99,1.01)  -1.57%
BenchmarkFannkuch11                4.44s × (1.00,1.00)     4.42s × (1.00,1.00)  -0.40%
BenchmarkFmtFprintfEmpty           119ns × (0.95,1.02)     118ns × (0.96,1.02)  ~
BenchmarkFmtFprintfString          501ns × (0.99,1.02)     486ns × (0.99,1.01)  -2.89%
BenchmarkFmtFprintfInt             474ns × (0.99,1.00)     457ns × (0.99,1.01)  -3.59%
BenchmarkFmtFprintfIntInt          792ns × (1.00,1.00)     768ns × (1.00,1.01)  -3.03%
BenchmarkFmtFprintfPrefixedInt     574ns × (1.00,1.01)     584ns × (0.99,1.03)  +1.83%
BenchmarkFmtFprintfFloat           749ns × (1.00,1.00)     739ns × (0.99,1.00)  -1.34%
BenchmarkFmtManyArgs              2.94µs × (1.00,1.01)    2.77µs × (1.00,1.00)  -5.76%
BenchmarkGobDecode                39.5ms × (0.99,1.01)    39.3ms × (0.99,1.01)  ~
BenchmarkGobEncode                39.4ms × (1.00,1.01)    39.4ms × (0.99,1.00)  ~
BenchmarkGzip                      658ms × (1.00,1.01)     661ms × (0.99,1.01)  ~
BenchmarkGunzip                    142ms × (1.00,1.00)     142ms × (1.00,1.00)  +0.22%
BenchmarkHTTPClientServer          134µs × (0.99,1.01)     133µs × (0.98,1.01)  ~
BenchmarkJSONEncode               57.1ms × (0.99,1.01)    56.5ms × (0.99,1.01)  ~
BenchmarkJSONDecode                141ms × (1.00,1.00)     143ms × (1.00,1.00)  +1.09%
BenchmarkMandelbrot200            6.01ms × (1.00,1.00)    6.01ms × (1.00,1.00)  ~
BenchmarkGoParse                  10.1ms × (0.91,1.09)     9.6ms × (0.94,1.07)  ~
BenchmarkRegexpMatchEasy0_32       207ns × (1.00,1.01)     210ns × (1.00,1.00)  +1.45%
BenchmarkRegexpMatchEasy0_1K       592ns × (0.99,1.00)     596ns × (0.99,1.01)  +0.68%
BenchmarkRegexpMatchEasy1_32       184ns × (0.99,1.01)     184ns × (0.99,1.01)  ~
BenchmarkRegexpMatchEasy1_1K      1.01µs × (1.00,1.00)    1.01µs × (0.99,1.01)  ~
BenchmarkRegexpMatchMedium_32      327ns × (0.99,1.00)     327ns × (1.00,1.01)  ~
BenchmarkRegexpMatchMedium_1K     92.5µs × (1.00,1.00)    93.0µs × (1.00,1.02)  +0.48%
BenchmarkRegexpMatchHard_32       4.79µs × (0.95,1.00)    4.76µs × (0.95,1.01)  ~
BenchmarkRegexpMatchHard_1K        136µs × (1.00,1.00)     136µs × (1.00,1.01)  ~
BenchmarkRevcomp                   900ms × (0.99,1.01)     892ms × (1.00,1.01)  ~
BenchmarkTemplate                  170ms × (0.99,1.01)     175ms × (0.99,1.00)  +2.95%
BenchmarkTimeParse                 645ns × (1.00,1.00)     638ns × (1.00,1.00)  -1.16%
BenchmarkTimeFormat                740ns × (1.00,1.00)     772ns × (1.00,1.00)  +4.39%

Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Russ Cox 2015-04-17 00:25:10 -04:00
parent 673bd18805
commit 0ad4f8b1f7
10 changed files with 272 additions and 97 deletions

View file

@ -403,7 +403,7 @@ func Cgen_eface(n *Node, res *Node) {
* n.Left is x
* n.Type is T
*/
func cgen_dottype(n *Node, res, resok *Node) {
func cgen_dottype(n *Node, res, resok *Node, wb bool) {
if Debug_typeassert > 0 {
Warn("type assertion inlined")
}
@ -441,16 +441,17 @@ func cgen_dottype(n *Node, res, resok *Node) {
Cgen(typename(n.Type), &r2)
Thearch.Gins(Thearch.Optoas(OCMP, byteptr), &r1, &r2)
p := Gbranch(Thearch.Optoas(ONE, byteptr), nil, -1)
Regfree(&r2) // not needed for success path; reclaimed on one failure path
iface.Xoffset += int64(Widthptr)
Cgen(&iface, &r1)
Regfree(&iface)
if resok == nil {
r1.Type = res.Type
Cgen(&r1, res)
cgen_wb(&r1, res, wb)
q := Gbranch(obj.AJMP, nil, 0)
Patch(p, Pc)
Regrealloc(&r2) // reclaim from above, for this failure path
fn := syslook("panicdottype", 0)
dowidth(fn.Type)
call := Nod(OCALLFUNC, fn, nil)
@ -467,10 +468,9 @@ func cgen_dottype(n *Node, res, resok *Node) {
// This half is handling the res, resok = x.(T) case,
// which is called from gen, not cgen, and is consequently fussier
// about blank assignments. We have to avoid calling cgen for those.
Regfree(&r2)
r1.Type = res.Type
if !isblank(res) {
Cgen(&r1, res)
cgen_wb(&r1, res, wb)
}
Regfree(&r1)
if !isblank(resok) {
@ -979,8 +979,11 @@ func gen(n *Node) {
}
Cgen_as(n.Left, n.Right)
case OASWB:
Cgen_as_wb(n.Left, n.Right, true)
case OAS2DOTTYPE:
cgen_dottype(n.Rlist.N, n.List.N, n.List.Next.N)
cgen_dottype(n.Rlist.N, n.List.N, n.List.Next.N, false)
case OCALLMETH:
cgen_callmeth(n, 0)
@ -1023,10 +1026,18 @@ ret:
lineno = lno
}
func Cgen_as(nl *Node, nr *Node) {
func Cgen_as(nl, nr *Node) {
Cgen_as_wb(nl, nr, false)
}
func Cgen_as_wb(nl, nr *Node, wb bool) {
if Debug['g'] != 0 {
Dump("cgen_as", nl)
Dump("cgen_as = ", nr)
op := "cgen_as"
if wb {
op = "cgen_as_wb"
}
Dump(op, nl)
Dump(op+" = ", nr)
}
for nr != nil && nr.Op == OCONVNOP {
@ -1065,7 +1076,7 @@ func Cgen_as(nl *Node, nr *Node) {
return
}
Cgen(nr, nl)
cgen_wb(nr, nl, wb)
}
func cgen_callmeth(n *Node, proc int) {
@ -1126,31 +1137,40 @@ func checklabels() {
// Slices, strings and interfaces are supported. Small structs or arrays with
// elements of basic type are also supported.
// nr is nil when assigning a zero value.
func Componentgen(nr *Node, nl *Node) bool {
func Componentgen(nr, nl *Node) bool {
return componentgen_wb(nr, nl, false)
}
// componentgen_wb is like componentgen but if wb==true emits write barriers for pointer updates.
func componentgen_wb(nr, nl *Node, wb bool) bool {
// Don't generate any code for complete copy of a variable into itself.
// It's useless, and the VARDEF will incorrectly mark the old value as dead.
// (This check assumes that the arguments passed to componentgen did not
// themselves come from Igen, or else we could have Op==ONAME but
// with a Type and Xoffset describing an individual field, not the entire
// variable.)
if nl.Op == ONAME && nl == nr {
return true
}
// Count number of moves required to move components.
// If using write barrier, can only emit one pointer.
// TODO(rsc): Allow more pointers, for reflect.Value.
const maxMoves = 8
n := 0
numPtr := 0
visitComponents(nl.Type, 0, func(t *Type, offset int64) bool {
n++
return n <= maxMoves
if int(Simtype[t.Etype]) == Tptr && t != itable {
numPtr++
}
return n <= maxMoves && (!wb || numPtr <= 1)
})
if n > maxMoves {
if n > maxMoves || wb && numPtr > 1 {
return false
}
isConstString := Isconst(nr, CTSTR)
nodl := *nl
if !cadable(nl) {
if nr != nil && !cadable(nr) && !isConstString {
return false
}
Igen(nl, &nodl, nil)
defer Regfree(&nodl)
}
lbase := nodl.Xoffset
// Must call emitVardef on every path out of this function,
// but only after evaluating rhs.
// Must call emitVardef after evaluating rhs but before writing to lhs.
emitVardef := func() {
// Emit vardef if needed.
if nl.Op == ONAME {
@ -1161,6 +1181,26 @@ func Componentgen(nr *Node, nl *Node) bool {
}
}
isConstString := Isconst(nr, CTSTR)
if !cadable(nl) && nr != nil && !cadable(nr) && !isConstString {
return false
}
var nodl Node
if cadable(nl) {
nodl = *nl
} else {
if nr != nil && !cadable(nr) && !isConstString {
return false
}
if nr == nil || isConstString || nl.Ullman >= nr.Ullman {
Igen(nl, &nodl, nil)
defer Regfree(&nodl)
}
}
lbase := nodl.Xoffset
// Special case: zeroing.
var nodr Node
if nr == nil {
@ -1218,23 +1258,34 @@ func Componentgen(nr *Node, nl *Node) bool {
// General case: copy nl = nr.
nodr = *nr
if !cadable(nr) {
if nr.Ullman >= UINF && nodl.Op == OINDREG {
Fatal("miscompile")
}
Igen(nr, &nodr, nil)
defer Regfree(&nodr)
}
rbase := nodr.Xoffset
// Don't generate any code for complete copy of a variable into itself.
// It's useless, and the VARDEF will incorrectly mark the old value as dead.
// (This check assumes that the arguments passed to componentgen did not
// themselves come from Igen, or else we could have Op==ONAME but
// with a Type and Xoffset describing an individual field, not the entire
// variable.)
if nl.Op == ONAME && nr.Op == ONAME && nl == nr {
return true
if nodl.Op == 0 {
Igen(nl, &nodl, nil)
defer Regfree(&nodl)
lbase = nodl.Xoffset
}
emitVardef()
var (
ptrType *Type
ptrOffset int64
)
visitComponents(nl.Type, 0, func(t *Type, offset int64) bool {
if wb && int(Simtype[t.Etype]) == Tptr && t != itable {
if ptrType != nil {
Fatal("componentgen_wb %v", Tconv(nl.Type, 0))
}
ptrType = t
ptrOffset = offset
return true
}
nodl.Type = t
nodl.Xoffset = lbase + offset
nodr.Type = t
@ -1242,6 +1293,13 @@ func Componentgen(nr *Node, nl *Node) bool {
Thearch.Gmove(&nodr, &nodl)
return true
})
if ptrType != nil {
nodl.Type = ptrType
nodl.Xoffset = lbase + ptrOffset
nodr.Type = ptrType
nodr.Xoffset = rbase + ptrOffset
cgen_wbptr(&nodr, &nodl)
}
return true
}
@ -1283,7 +1341,7 @@ func visitComponents(t *Type, startOffset int64, f func(elem *Type, elemOffset i
f(Types[TFLOAT64], startOffset+8)
case TINTER:
return f(Ptrto(Types[TUINT8]), startOffset) &&
return f(itable, startOffset) &&
f(Ptrto(Types[TUINT8]), startOffset+int64(Widthptr))
return true