mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: better write barrier removal when initializing new objects
When initializing a new object, we're often writing 1) to a location that doesn't have a pointer to a heap object 2) a pointer that doesn't point to a heap object When both those conditions are true, we can avoid the write barrier. This CL detects case 1 by looking for writes to known-zeroed locations. The results of runtime.newobject are zeroed, and we perform a simple tracking of which parts of that object are written so we can determine what part remains zero at each write. This CL detects case 2 by looking for addresses of globals (including the types and itabs which are used in interfaces) and for nil pointers. Makes cmd/go 0.3% smaller. Some particular cases, like the slice literal in #29573, can get much smaller. TODO: we can remove actual zero writes also with this mechanism. Update #29573 Change-Id: Ie74a3533775ea88da0495ba02458391e5db26cb9 Reviewed-on: https://go-review.googlesource.com/c/go/+/156363 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
08751259b7
commit
ca36af215f
3 changed files with 145 additions and 13 deletions
|
|
@ -729,6 +729,7 @@ func fixedlit(ctxt initContext, kind initKind, n *Node, var_ *Node, init *Nodes)
|
||||||
if r.Sym.IsBlank() {
|
if r.Sym.IsBlank() {
|
||||||
return nblank, r.Left
|
return nblank, r.Left
|
||||||
}
|
}
|
||||||
|
setlineno(r)
|
||||||
return nodSym(ODOT, var_, r.Sym), r.Left
|
return nodSym(ODOT, var_, r.Sym), r.Left
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|
@ -756,7 +757,7 @@ func fixedlit(ctxt initContext, kind initKind, n *Node, var_ *Node, init *Nodes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// build list of assignments: var[index] = expr
|
// build list of assignments: var[index] = expr
|
||||||
setlineno(value)
|
setlineno(a)
|
||||||
a = nod(OAS, a, value)
|
a = nod(OAS, a, value)
|
||||||
a = typecheck(a, ctxStmt)
|
a = typecheck(a, ctxStmt)
|
||||||
switch kind {
|
switch kind {
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,18 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A ZeroRegion records a range of an object which is known to be zero.
|
||||||
|
// A ZeroRegion only applies to a single memory state.
|
||||||
|
type ZeroRegion struct {
|
||||||
|
base *Value
|
||||||
|
min int64
|
||||||
|
max int64
|
||||||
|
}
|
||||||
|
|
||||||
// needwb reports whether we need write barrier for store op v.
|
// needwb reports whether we need write barrier for store op v.
|
||||||
// v must be Store/Move/Zero.
|
// v must be Store/Move/Zero.
|
||||||
func needwb(v *Value) bool {
|
// zeroes provides known zero information (keyed by ID of memory-type values).
|
||||||
|
func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
|
||||||
t, ok := v.Aux.(*types.Type)
|
t, ok := v.Aux.(*types.Type)
|
||||||
if !ok {
|
if !ok {
|
||||||
v.Fatalf("store aux is not a type: %s", v.LongString())
|
v.Fatalf("store aux is not a type: %s", v.LongString())
|
||||||
|
|
@ -24,14 +33,24 @@ func needwb(v *Value) bool {
|
||||||
if IsStackAddr(v.Args[0]) {
|
if IsStackAddr(v.Args[0]) {
|
||||||
return false // write on stack doesn't need write barrier
|
return false // write on stack doesn't need write barrier
|
||||||
}
|
}
|
||||||
if v.Op == OpStore && IsGlobalAddr(v.Args[1]) && IsNewObject(v.Args[0], v.MemoryArg()) {
|
|
||||||
// Storing pointers to non-heap locations into a fresh object doesn't need a write barrier.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) && IsNewObject(v.Args[0], v.MemoryArg()) {
|
if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) && IsNewObject(v.Args[0], v.MemoryArg()) {
|
||||||
// Copying data from readonly memory into a fresh object doesn't need a write barrier.
|
// Copying data from readonly memory into a fresh object doesn't need a write barrier.
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if v.Op == OpStore && IsGlobalAddr(v.Args[1]) {
|
||||||
|
// Storing pointers to non-heap locations into zeroed memory doesn't need a write barrier.
|
||||||
|
ptr := v.Args[0]
|
||||||
|
var off int64
|
||||||
|
size := v.Aux.(*types.Type).Size()
|
||||||
|
for ptr.Op == OpOffPtr {
|
||||||
|
off += ptr.AuxInt
|
||||||
|
ptr = ptr.Args[0]
|
||||||
|
}
|
||||||
|
z := zeroes[v.MemoryArg().ID]
|
||||||
|
if ptr == z.base && off >= z.min && off+size <= z.max {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -58,6 +77,7 @@ func writebarrier(f *Func) {
|
||||||
var sset *sparseSet
|
var sset *sparseSet
|
||||||
var storeNumber []int32
|
var storeNumber []int32
|
||||||
|
|
||||||
|
zeroes := f.computeZeroMap()
|
||||||
for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
|
for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
|
||||||
// first, identify all the stores that need to insert a write barrier.
|
// first, identify all the stores that need to insert a write barrier.
|
||||||
// mark them with WB ops temporarily. record presence of WB ops.
|
// mark them with WB ops temporarily. record presence of WB ops.
|
||||||
|
|
@ -65,7 +85,7 @@ func writebarrier(f *Func) {
|
||||||
for _, v := range b.Values {
|
for _, v := range b.Values {
|
||||||
switch v.Op {
|
switch v.Op {
|
||||||
case OpStore, OpMove, OpZero:
|
case OpStore, OpMove, OpZero:
|
||||||
if needwb(v) {
|
if needwb(v, zeroes) {
|
||||||
switch v.Op {
|
switch v.Op {
|
||||||
case OpStore:
|
case OpStore:
|
||||||
v.Op = OpStoreWB
|
v.Op = OpStoreWB
|
||||||
|
|
@ -301,6 +321,87 @@ func writebarrier(f *Func) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// computeZeroMap returns a map from an ID of a memory value to
|
||||||
|
// a set of locations that are known to be zeroed at that memory value.
|
||||||
|
func (f *Func) computeZeroMap() map[ID]ZeroRegion {
|
||||||
|
// Keep track of which parts of memory are known to be zero.
|
||||||
|
// This helps with removing write barriers for various initialization patterns.
|
||||||
|
// This analysis is conservative. We only keep track, for each memory state, of
|
||||||
|
// a single constant range of a single object which is known to be zero.
|
||||||
|
zeroes := map[ID]ZeroRegion{}
|
||||||
|
// Find new objects.
|
||||||
|
for _, b := range f.Blocks {
|
||||||
|
for _, v := range b.Values {
|
||||||
|
if v.Op != OpLoad {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mem := v.MemoryArg()
|
||||||
|
if IsNewObject(v, mem) {
|
||||||
|
zeroes[mem.ID] = ZeroRegion{v, 0, v.Type.Elem().Size()}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Find stores to those new objects.
|
||||||
|
for {
|
||||||
|
changed := false
|
||||||
|
for _, b := range f.Blocks {
|
||||||
|
// Note: iterating forwards helps convergence, as values are
|
||||||
|
// typically (but not always!) in store order.
|
||||||
|
for _, v := range b.Values {
|
||||||
|
if v.Op != OpStore {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
z, ok := zeroes[v.MemoryArg().ID]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ptr := v.Args[0]
|
||||||
|
var off int64
|
||||||
|
size := v.Aux.(*types.Type).Size()
|
||||||
|
for ptr.Op == OpOffPtr {
|
||||||
|
off += ptr.AuxInt
|
||||||
|
ptr = ptr.Args[0]
|
||||||
|
}
|
||||||
|
if ptr != z.base {
|
||||||
|
// Different base object - we don't know anything.
|
||||||
|
// We could even be writing to the base object we know
|
||||||
|
// about, but through an aliased but offset pointer.
|
||||||
|
// So we have to throw all the zero information we have away.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if off < z.min || off+size > z.max {
|
||||||
|
// Writing, at least partially, outside the known zeroes.
|
||||||
|
// We could salvage some zero information, but probably
|
||||||
|
// not worth it.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// We now know we're storing to a zeroed area.
|
||||||
|
// We need to make a smaller zero range for the result of this store.
|
||||||
|
if off == z.min {
|
||||||
|
z.min += size
|
||||||
|
} else if off+size == z.max {
|
||||||
|
z.max -= size
|
||||||
|
} else {
|
||||||
|
// The store splits the known zero range in two.
|
||||||
|
// Keep track of the upper one, as we tend to initialize
|
||||||
|
// things in increasing memory order.
|
||||||
|
// TODO: keep track of larger one instead?
|
||||||
|
z.min = off + size
|
||||||
|
}
|
||||||
|
// Save updated zero range.
|
||||||
|
if zeroes[v.ID] != z {
|
||||||
|
zeroes[v.ID] = z
|
||||||
|
changed = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !changed {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return zeroes
|
||||||
|
}
|
||||||
|
|
||||||
// wbcall emits write barrier runtime call in b, returns memory.
|
// wbcall emits write barrier runtime call in b, returns memory.
|
||||||
// if valIsVolatile, it moves val into temp space before making the call.
|
// if valIsVolatile, it moves val into temp space before making the call.
|
||||||
func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value, valIsVolatile bool) *Value {
|
func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value, valIsVolatile bool) *Value {
|
||||||
|
|
@ -373,9 +474,15 @@ func IsStackAddr(v *Value) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsGlobalAddr reports whether v is known to be an address of a global.
|
// IsGlobalAddr reports whether v is known to be an address of a global (or nil).
|
||||||
func IsGlobalAddr(v *Value) bool {
|
func IsGlobalAddr(v *Value) bool {
|
||||||
return v.Op == OpAddr && v.Args[0].Op == OpSB
|
if v.Op == OpAddr && v.Args[0].Op == OpSB {
|
||||||
|
return true // address of a global
|
||||||
|
}
|
||||||
|
if v.Op == OpConst64 || v.Op == OpConst32 {
|
||||||
|
return true // nil, the only possible pointer constant
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
|
// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
|
||||||
|
|
@ -388,10 +495,6 @@ func IsReadOnlyGlobalAddr(v *Value) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.
|
// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.
|
||||||
// TODO: Be more precise. We really want "IsNilPointer" for the particular field in question.
|
|
||||||
// Right now, we can only detect a new object before any writes have been done to it.
|
|
||||||
// We could ignore non-pointer writes, writes to offsets which
|
|
||||||
// are known not to overlap the write in question, etc.
|
|
||||||
func IsNewObject(v *Value, mem *Value) bool {
|
func IsNewObject(v *Value, mem *Value) bool {
|
||||||
if v.Op != OpLoad {
|
if v.Op != OpLoad {
|
||||||
return false
|
return false
|
||||||
|
|
|
||||||
|
|
@ -261,3 +261,31 @@ func f24() **int {
|
||||||
func f25() []string {
|
func f25() []string {
|
||||||
return []string{"abc", "def", "ghi"} // no write barrier here
|
return []string{"abc", "def", "ghi"} // no write barrier here
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type T26 struct {
|
||||||
|
a, b, c int
|
||||||
|
d, e, f *int
|
||||||
|
}
|
||||||
|
|
||||||
|
var g26 int
|
||||||
|
|
||||||
|
func f26(p *int) *T26 { // see issue 29573
|
||||||
|
return &T26{
|
||||||
|
a: 5,
|
||||||
|
b: 6,
|
||||||
|
c: 7,
|
||||||
|
d: &g26, // no write barrier: global ptr
|
||||||
|
e: nil, // no write barrier: nil ptr
|
||||||
|
f: p, // ERROR "write barrier"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func f27(p *int) []interface{} {
|
||||||
|
return []interface{}{
|
||||||
|
nil, // no write barrier: zeroed memory, nil ptr
|
||||||
|
(*T26)(nil), // no write barrier: zeroed memory, type ptr & nil ptr
|
||||||
|
&g26, // no write barrier: zeroed memory, type ptr & global ptr
|
||||||
|
7, // no write barrier: zeroed memory, type ptr & global ptr
|
||||||
|
p, // ERROR "write barrier"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue