go/src/cmd/compile/internal/ssa/writebarrier.go
Matthew Dempsky ad1f2c9618 cmd/compile: use CTNIL for pointer-typed OLITERALs
We used to be more aggressive about constant folding in the frontend,
handling expressions that the Go spec does not consider constant;
e.g., "(*int)(unsafe.Pointer(uintptr(200)))". However, that led to a
lot of subtle Go spec conformance issues, so we've since abandoned
that effort (CL 151320), leaving SSA to handle these cases instead.

As such, the only time we now end up with pointer-typed OLITERALs is
when "nil" is implicitly converted to a pointer-typed variable.
Instead of representing these OLITERALs with an CTINT of 0, we can
just use CTNIL.

Saves a few bytes of memory and lines of code.

Change-Id: Ibc5c756b992fdc89c3bdaf4fda3aa352e8e2b101
Reviewed-on: https://go-review.googlesource.com/c/go/+/193437
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
2019-09-06 22:55:17 +00:00

593 lines
17 KiB
Go

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/src"
"strings"
)
// A ZeroRegion records a range of an object which is known to be zero.
// A ZeroRegion only applies to a single memory state.
type ZeroRegion struct {
base *Value
min int64
max int64
}
// needwb reports whether we need write barrier for store op v.
// v must be Store/Move/Zero.
// zeroes provides known zero information (keyed by ID of memory-type values).
func needwb(v *Value, zeroes map[ID]ZeroRegion) bool {
t, ok := v.Aux.(*types.Type)
if !ok {
v.Fatalf("store aux is not a type: %s", v.LongString())
}
if !t.HasHeapPointer() {
return false
}
if IsStackAddr(v.Args[0]) {
return false // write on stack doesn't need write barrier
}
if v.Op == OpMove && IsReadOnlyGlobalAddr(v.Args[1]) && IsNewObject(v.Args[0], v.MemoryArg()) {
// Copying data from readonly memory into a fresh object doesn't need a write barrier.
return false
}
if v.Op == OpStore && IsGlobalAddr(v.Args[1]) {
// Storing pointers to non-heap locations into zeroed memory doesn't need a write barrier.
ptr := v.Args[0]
var off int64
size := v.Aux.(*types.Type).Size()
for ptr.Op == OpOffPtr {
off += ptr.AuxInt
ptr = ptr.Args[0]
}
z := zeroes[v.MemoryArg().ID]
if ptr == z.base && off >= z.min && off+size <= z.max {
return false
}
}
return true
}
// writebarrier pass inserts write barriers for store ops (Store, Move, Zero)
// when necessary (the condition above). It rewrites store ops to branches
// and runtime calls, like
//
// if writeBarrier.enabled {
// gcWriteBarrier(ptr, val) // Not a regular Go call
// } else {
// *ptr = val
// }
//
// A sequence of WB stores for many pointer fields of a single type will
// be emitted together, with a single branch.
func writebarrier(f *Func) {
if !f.fe.UseWriteBarrier() {
return
}
var sb, sp, wbaddr, const0 *Value
var typedmemmove, typedmemclr, gcWriteBarrier *obj.LSym
var stores, after []*Value
var sset *sparseSet
var storeNumber []int32
zeroes := f.computeZeroMap()
for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
// first, identify all the stores that need to insert a write barrier.
// mark them with WB ops temporarily. record presence of WB ops.
nWBops := 0 // count of temporarily created WB ops remaining to be rewritten in the current block
for _, v := range b.Values {
switch v.Op {
case OpStore, OpMove, OpZero:
if needwb(v, zeroes) {
switch v.Op {
case OpStore:
v.Op = OpStoreWB
case OpMove:
v.Op = OpMoveWB
case OpZero:
v.Op = OpZeroWB
}
nWBops++
}
}
}
if nWBops == 0 {
continue
}
if wbaddr == nil {
// lazily initialize global values for write barrier test and calls
// find SB and SP values in entry block
initpos := f.Entry.Pos
for _, v := range f.Entry.Values {
if v.Op == OpSB {
sb = v
}
if v.Op == OpSP {
sp = v
}
if sb != nil && sp != nil {
break
}
}
if sb == nil {
sb = f.Entry.NewValue0(initpos, OpSB, f.Config.Types.Uintptr)
}
if sp == nil {
sp = f.Entry.NewValue0(initpos, OpSP, f.Config.Types.Uintptr)
}
wbsym := f.fe.Syslook("writeBarrier")
wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb)
gcWriteBarrier = f.fe.Syslook("gcWriteBarrier")
typedmemmove = f.fe.Syslook("typedmemmove")
typedmemclr = f.fe.Syslook("typedmemclr")
const0 = f.ConstInt32(f.Config.Types.UInt32, 0)
// allocate auxiliary data structures for computing store order
sset = f.newSparseSet(f.NumValues())
defer f.retSparseSet(sset)
storeNumber = make([]int32, f.NumValues())
}
// order values in store order
b.Values = storeOrder(b.Values, sset, storeNumber)
firstSplit := true
again:
// find the start and end of the last contiguous WB store sequence.
// a branch will be inserted there. values after it will be moved
// to a new block.
var last *Value
var start, end int
values := b.Values
FindSeq:
for i := len(values) - 1; i >= 0; i-- {
w := values[i]
switch w.Op {
case OpStoreWB, OpMoveWB, OpZeroWB:
start = i
if last == nil {
last = w
end = i + 1
}
case OpVarDef, OpVarLive, OpVarKill:
continue
default:
if last == nil {
continue
}
break FindSeq
}
}
stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing
after = append(after[:0], b.Values[end:]...)
b.Values = b.Values[:start]
// find the memory before the WB stores
mem := stores[0].MemoryArg()
pos := stores[0].Pos
bThen := f.NewBlock(BlockPlain)
bElse := f.NewBlock(BlockPlain)
bEnd := f.NewBlock(b.Kind)
bThen.Pos = pos
bElse.Pos = pos
bEnd.Pos = b.Pos
b.Pos = pos
// set up control flow for end block
bEnd.SetControl(b.Control)
bEnd.Likely = b.Likely
for _, e := range b.Succs {
bEnd.Succs = append(bEnd.Succs, e)
e.b.Preds[e.i].b = bEnd
}
// set up control flow for write barrier test
// load word, test word, avoiding partial register write from load byte.
cfgtypes := &f.Config.Types
flag := b.NewValue2(pos, OpLoad, cfgtypes.UInt32, wbaddr, mem)
flag = b.NewValue2(pos, OpNeq32, cfgtypes.Bool, flag, const0)
b.Kind = BlockIf
b.SetControl(flag)
b.Likely = BranchUnlikely
b.Succs = b.Succs[:0]
b.AddEdgeTo(bThen)
b.AddEdgeTo(bElse)
// TODO: For OpStoreWB and the buffered write barrier,
// we could move the write out of the write barrier,
// which would lead to fewer branches. We could do
// something similar to OpZeroWB, since the runtime
// could provide just the barrier half and then we
// could unconditionally do an OpZero (which could
// also generate better zeroing code). OpMoveWB is
// trickier and would require changing how
// cgoCheckMemmove works.
bThen.AddEdgeTo(bEnd)
bElse.AddEdgeTo(bEnd)
// for each write barrier store, append write barrier version to bThen
// and simple store version to bElse
memThen := mem
memElse := mem
// If the source of a MoveWB is volatile (will be clobbered by a
// function call), we need to copy it to a temporary location, as
// marshaling the args of typedmemmove might clobber the value we're
// trying to move.
// Look for volatile source, copy it to temporary before we emit any
// call.
// It is unlikely to have more than one of them. Just do a linear
// search instead of using a map.
type volatileCopy struct {
src *Value // address of original volatile value
tmp *Value // address of temporary we've copied the volatile value into
}
var volatiles []volatileCopy
copyLoop:
for _, w := range stores {
if w.Op == OpMoveWB {
val := w.Args[1]
if isVolatile(val) {
for _, c := range volatiles {
if val == c.src {
continue copyLoop // already copied
}
}
t := val.Type.Elem()
tmp := f.fe.Auto(w.Pos, t)
memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen)
tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen)
siz := t.Size()
memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen)
memThen.Aux = t
volatiles = append(volatiles, volatileCopy{val, tmpaddr})
}
}
}
for _, w := range stores {
ptr := w.Args[0]
pos := w.Pos
var fn *obj.LSym
var typ *obj.LSym
var val *Value
switch w.Op {
case OpStoreWB:
val = w.Args[1]
nWBops--
case OpMoveWB:
fn = typedmemmove
val = w.Args[1]
typ = w.Aux.(*types.Type).Symbol()
nWBops--
case OpZeroWB:
fn = typedmemclr
typ = w.Aux.(*types.Type).Symbol()
nWBops--
case OpVarDef, OpVarLive, OpVarKill:
}
// then block: emit write barrier call
switch w.Op {
case OpStoreWB, OpMoveWB, OpZeroWB:
if w.Op == OpStoreWB {
memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen)
} else {
srcval := val
if w.Op == OpMoveWB && isVolatile(srcval) {
for _, c := range volatiles {
if srcval == c.src {
srcval = c.tmp
break
}
}
}
memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb)
}
// Note that we set up a writebarrier function call.
f.fe.SetWBPos(pos)
case OpVarDef, OpVarLive, OpVarKill:
memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen)
}
// else block: normal store
switch w.Op {
case OpStoreWB:
memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse)
case OpMoveWB:
memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse)
memElse.Aux = w.Aux
case OpZeroWB:
memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse)
memElse.Aux = w.Aux
case OpVarDef, OpVarLive, OpVarKill:
memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse)
}
}
// mark volatile temps dead
for _, c := range volatiles {
tmpNode := c.tmp.Aux
memThen = bThen.NewValue1A(memThen.Pos, OpVarKill, types.TypeMem, tmpNode, memThen)
}
// merge memory
// Splice memory Phi into the last memory of the original sequence,
// which may be used in subsequent blocks. Other memories in the
// sequence must be dead after this block since there can be only
// one memory live.
bEnd.Values = append(bEnd.Values, last)
last.Block = bEnd
last.reset(OpPhi)
last.Type = types.TypeMem
last.AddArg(memThen)
last.AddArg(memElse)
for _, w := range stores {
if w != last {
w.resetArgs()
}
}
for _, w := range stores {
if w != last {
f.freeValue(w)
}
}
// put values after the store sequence into the end block
bEnd.Values = append(bEnd.Values, after...)
for _, w := range after {
w.Block = bEnd
}
// Preemption is unsafe between loading the write
// barrier-enabled flag and performing the write
// because that would allow a GC phase transition,
// which would invalidate the flag. Remember the
// conditional block so liveness analysis can disable
// safe-points. This is somewhat subtle because we're
// splitting b bottom-up.
if firstSplit {
// Add b itself.
b.Func.WBLoads = append(b.Func.WBLoads, b)
firstSplit = false
} else {
// We've already split b, so we just pushed a
// write barrier test into bEnd.
b.Func.WBLoads = append(b.Func.WBLoads, bEnd)
}
// if we have more stores in this block, do this block again
if nWBops > 0 {
goto again
}
}
}
// computeZeroMap returns a map from an ID of a memory value to
// a set of locations that are known to be zeroed at that memory value.
func (f *Func) computeZeroMap() map[ID]ZeroRegion {
// Keep track of which parts of memory are known to be zero.
// This helps with removing write barriers for various initialization patterns.
// This analysis is conservative. We only keep track, for each memory state, of
// a single constant range of a single object which is known to be zero.
zeroes := map[ID]ZeroRegion{}
// Find new objects.
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op != OpLoad {
continue
}
mem := v.MemoryArg()
if IsNewObject(v, mem) {
zeroes[mem.ID] = ZeroRegion{v, 0, v.Type.Elem().Size()}
}
}
}
// Find stores to those new objects.
for {
changed := false
for _, b := range f.Blocks {
// Note: iterating forwards helps convergence, as values are
// typically (but not always!) in store order.
for _, v := range b.Values {
if v.Op != OpStore {
continue
}
z, ok := zeroes[v.MemoryArg().ID]
if !ok {
continue
}
ptr := v.Args[0]
var off int64
size := v.Aux.(*types.Type).Size()
for ptr.Op == OpOffPtr {
off += ptr.AuxInt
ptr = ptr.Args[0]
}
if ptr != z.base {
// Different base object - we don't know anything.
// We could even be writing to the base object we know
// about, but through an aliased but offset pointer.
// So we have to throw all the zero information we have away.
continue
}
if off < z.min || off+size > z.max {
// Writing, at least partially, outside the known zeroes.
// We could salvage some zero information, but probably
// not worth it.
continue
}
// We now know we're storing to a zeroed area.
// We need to make a smaller zero range for the result of this store.
if off == z.min {
z.min += size
} else if off+size == z.max {
z.max -= size
} else {
// The store splits the known zero range in two.
// Keep track of the upper one, as we tend to initialize
// things in increasing memory order.
// TODO: keep track of larger one instead?
z.min = off + size
}
// Save updated zero range.
if zeroes[v.ID] != z {
zeroes[v.ID] = z
changed = true
}
}
}
if !changed {
break
}
}
return zeroes
}
// wbcall emits write barrier runtime call in b, returns memory.
func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value {
config := b.Func.Config
// put arguments on stack
off := config.ctxt.FixedFrameSize()
if typ != nil { // for typedmemmove
taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
off = round(off, taddr.Type.Alignment())
arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp)
mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem)
off += taddr.Type.Size()
}
off = round(off, ptr.Type.Alignment())
arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp)
mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem)
off += ptr.Type.Size()
if val != nil {
off = round(off, val.Type.Alignment())
arg = b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp)
mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem)
off += val.Type.Size()
}
off = round(off, config.PtrSize)
// issue call
mem = b.NewValue1A(pos, OpStaticCall, types.TypeMem, fn, mem)
mem.AuxInt = off - config.ctxt.FixedFrameSize()
return mem
}
// round to a multiple of r, r is a power of 2
func round(o int64, r int64) int64 {
return (o + r - 1) &^ (r - 1)
}
// IsStackAddr reports whether v is known to be an address of a stack slot.
func IsStackAddr(v *Value) bool {
for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
v = v.Args[0]
}
switch v.Op {
case OpSP, OpLocalAddr:
return true
}
return false
}
// IsGlobalAddr reports whether v is known to be an address of a global (or nil).
func IsGlobalAddr(v *Value) bool {
if v.Op == OpAddr && v.Args[0].Op == OpSB {
return true // address of a global
}
if v.Op == OpConstNil {
return true
}
return false
}
// IsReadOnlyGlobalAddr reports whether v is known to be an address of a read-only global.
func IsReadOnlyGlobalAddr(v *Value) bool {
if !IsGlobalAddr(v) {
return false
}
if v.Op == OpConstNil {
// Nil pointers are read only. See issue 33438.
return true
}
// See TODO in OpAddr case in IsSanitizerSafeAddr below.
return strings.HasPrefix(v.Aux.(*obj.LSym).Name, `""..stmp_`)
}
// IsNewObject reports whether v is a pointer to a freshly allocated & zeroed object at memory state mem.
func IsNewObject(v *Value, mem *Value) bool {
if v.Op != OpLoad {
return false
}
if v.MemoryArg() != mem {
return false
}
if mem.Op != OpStaticCall {
return false
}
if !isSameSym(mem.Aux, "runtime.newobject") {
return false
}
if v.Args[0].Op != OpOffPtr {
return false
}
if v.Args[0].Args[0].Op != OpSP {
return false
}
c := v.Block.Func.Config
if v.Args[0].AuxInt != c.ctxt.FixedFrameSize()+c.RegSize { // offset of return value
return false
}
return true
}
// IsSanitizerSafeAddr reports whether v is known to be an address
// that doesn't need instrumentation.
func IsSanitizerSafeAddr(v *Value) bool {
for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
v = v.Args[0]
}
switch v.Op {
case OpSP, OpLocalAddr:
// Stack addresses are always safe.
return true
case OpITab, OpStringPtr, OpGetClosurePtr:
// Itabs, string data, and closure fields are
// read-only once initialized.
return true
case OpAddr:
sym := v.Aux.(*obj.LSym)
// TODO(mdempsky): Find a cleaner way to
// detect this. It would be nice if we could
// test sym.Type==objabi.SRODATA, but we don't
// initialize sym.Type until after function
// compilation.
if strings.HasPrefix(sym.Name, `""..stmp_`) {
return true
}
}
return false
}
// isVolatile reports whether v is a pointer to argument region on stack which
// will be clobbered by a function call.
func isVolatile(v *Value) bool {
for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
v = v.Args[0]
}
return v.Op == OpSP
}