go/src/cmd/compile/internal/gc/pgen.go
Josh Bleecher Snyder da15fe6870 cmd/internal/obj: rework gclocals handling
The compiler handled gcargs and gclocals LSyms unusually.
It generated placeholder symbols (makefuncdatasym),
filled them in, and then renamed them for content-addressability.
This is an important binary size optimization;
the same locals information occurs over and over.

This CL continues to treat these LSyms unusually,
but in a slightly more explicit way,
and importantly for concurrent compilation,
in a way that does not require concurrent
modification of Ctxt.Hash.

Instead of creating gcargs and gclocals in the usual way,
by creating a types.Sym and then an obj.LSym,
we add them directly to obj.FuncInfo,
initialize them in obj.InitTextSym,
and deduplicate and add them to ctxt.Data at the end.
Then the backend's job is simply to fill them in
and rename them appropriately.

Updates #15756

name       old alloc/op      new alloc/op      delta
Template        38.8MB ± 0%       38.7MB ± 0%  -0.22%  (p=0.016 n=5+5)
Unicode         29.8MB ± 0%       29.8MB ± 0%    ~     (p=0.690 n=5+5)
GoTypes          113MB ± 0%        113MB ± 0%  -0.24%  (p=0.008 n=5+5)
SSA             1.25GB ± 0%       1.24GB ± 0%  -0.39%  (p=0.008 n=5+5)
Flate           25.3MB ± 0%       25.2MB ± 0%  -0.43%  (p=0.008 n=5+5)
GoParser        31.7MB ± 0%       31.7MB ± 0%  -0.22%  (p=0.008 n=5+5)
Reflect         78.2MB ± 0%       77.6MB ± 0%  -0.80%  (p=0.008 n=5+5)
Tar             26.6MB ± 0%       26.3MB ± 0%  -0.85%  (p=0.008 n=5+5)
XML             42.4MB ± 0%       41.9MB ± 0%  -1.04%  (p=0.008 n=5+5)

name       old allocs/op     new allocs/op     delta
Template          378k ± 0%         377k ± 1%    ~     (p=0.151 n=5+5)
Unicode           321k ± 1%         321k ± 0%    ~     (p=0.841 n=5+5)
GoTypes          1.14M ± 0%        1.14M ± 0%  -0.47%  (p=0.016 n=5+5)
SSA              9.71M ± 0%        9.67M ± 0%  -0.33%  (p=0.008 n=5+5)
Flate             233k ± 1%         232k ± 1%    ~     (p=0.151 n=5+5)
GoParser          316k ± 0%         315k ± 0%  -0.49%  (p=0.016 n=5+5)
Reflect           979k ± 0%         972k ± 0%  -0.75%  (p=0.008 n=5+5)
Tar               250k ± 0%         247k ± 1%  -0.92%  (p=0.008 n=5+5)
XML               392k ± 1%         389k ± 0%  -0.67%  (p=0.008 n=5+5)

Change-Id: Idc36186ca9d2f8214b5f7720bbc27b6bb22fdc48
Reviewed-on: https://go-review.googlesource.com/40697
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-18 02:13:32 +00:00

378 lines
9.4 KiB
Go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gc
import (
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
"cmd/internal/dwarf"
"cmd/internal/obj"
"cmd/internal/src"
"cmd/internal/sys"
"fmt"
"sort"
)
// "Portable" code generation.
// TODO(mdempsky): Update to reference OpVar{Def,Kill,Live} instead
// and move to plive.go.
// VARDEF is an annotation for the liveness analysis, marking a place
// where a complete initialization (definition) of a variable begins.
// Since the liveness analysis can see initialization of single-word
// variables quite easy, gvardef is usually only called for multi-word
// or 'fat' variables, those satisfying isfat(n->type).
// However, gvardef is also called when a non-fat variable is initialized
// via a block move; the only time this happens is when you have
// return f()
// for a function with multiple return values exactly matching the return
// types of the current function.
//
// A 'VARDEF x' annotation in the instruction stream tells the liveness
// analysis to behave as though the variable x is being initialized at that
// point in the instruction stream. The VARDEF must appear before the
// actual (multi-instruction) initialization, and it must also appear after
// any uses of the previous value, if any. For example, if compiling:
//
// x = x[1:]
//
// it is important to generate code like:
//
// base, len, cap = pieces of x[1:]
// VARDEF x
// x = {base, len, cap}
//
// If instead the generated code looked like:
//
// VARDEF x
// base, len, cap = pieces of x[1:]
// x = {base, len, cap}
//
// then the liveness analysis would decide the previous value of x was
// unnecessary even though it is about to be used by the x[1:] computation.
// Similarly, if the generated code looked like:
//
// base, len, cap = pieces of x[1:]
// x = {base, len, cap}
// VARDEF x
//
// then the liveness analysis will not preserve the new value of x, because
// the VARDEF appears to have "overwritten" it.
//
// VARDEF is a bit of a kludge to work around the fact that the instruction
// stream is working on single-word values but the liveness analysis
// wants to work on individual variables, which might be multi-word
// aggregates. It might make sense at some point to look into letting
// the liveness analysis work on single-word values as well, although
// there are complications around interface values, slices, and strings,
// all of which cannot be treated as individual words.
//
// VARKILL is the opposite of VARDEF: it marks a value as no longer needed,
// even if its address has been taken. That is, a VARKILL annotation asserts
// that its argument is certainly dead, for use when the liveness analysis
// would not otherwise be able to deduce that fact.
func emitptrargsmap() {
if Curfn.Func.Nname.Sym.Name == "_" {
return
}
sym := lookup(fmt.Sprintf("%s.args_stackmap", Curfn.Func.Nname.Sym.Name))
lsym := Linksym(sym)
nptr := int(Curfn.Type.ArgWidth() / int64(Widthptr))
bv := bvalloc(int32(nptr) * 2)
nbitmap := 1
if Curfn.Type.Results().NumFields() > 0 {
nbitmap = 2
}
off := duint32LSym(lsym, 0, uint32(nbitmap))
off = duint32LSym(lsym, off, uint32(bv.n))
var xoffset int64
if Curfn.IsMethod() {
xoffset = 0
onebitwalktype1(Curfn.Type.Recvs(), &xoffset, bv)
}
if Curfn.Type.Params().NumFields() > 0 {
xoffset = 0
onebitwalktype1(Curfn.Type.Params(), &xoffset, bv)
}
off = dbvecLSym(lsym, off, bv)
if Curfn.Type.Results().NumFields() > 0 {
xoffset = 0
onebitwalktype1(Curfn.Type.Results(), &xoffset, bv)
off = dbvecLSym(lsym, off, bv)
}
ggloblLSym(lsym, int32(off), obj.RODATA|obj.LOCAL)
}
// cmpstackvarlt reports whether the stack variable a sorts before b.
//
// Sort the list of stack variables. Autos after anything else,
// within autos, unused after used, within used, things with
// pointers first, zeroed things first, and then decreasing size.
// Because autos are laid out in decreasing addresses
// on the stack, pointers first, zeroed things first and decreasing size
// really means, in memory, things with pointers needing zeroing at
// the top of the stack and increasing in size.
// Non-autos sort on offset.
func cmpstackvarlt(a, b *Node) bool {
if (a.Class == PAUTO) != (b.Class == PAUTO) {
return b.Class == PAUTO
}
if a.Class != PAUTO {
return a.Xoffset < b.Xoffset
}
if a.Used() != b.Used() {
return a.Used()
}
ap := types.Haspointers(a.Type)
bp := types.Haspointers(b.Type)
if ap != bp {
return ap
}
ap = a.Name.Needzero()
bp = b.Name.Needzero()
if ap != bp {
return ap
}
if a.Type.Width != b.Type.Width {
return a.Type.Width > b.Type.Width
}
return a.Sym.Name < b.Sym.Name
}
// byStackvar implements sort.Interface for []*Node using cmpstackvarlt.
type byStackVar []*Node
func (s byStackVar) Len() int { return len(s) }
func (s byStackVar) Less(i, j int) bool { return cmpstackvarlt(s[i], s[j]) }
func (s byStackVar) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s *ssafn) AllocFrame(f *ssa.Func) {
s.stksize = 0
s.stkptrsize = 0
fn := s.curfn.Func
// Mark the PAUTO's unused.
for _, ln := range fn.Dcl {
if ln.Class == PAUTO {
ln.SetUsed(false)
}
}
for _, l := range f.RegAlloc {
if ls, ok := l.(ssa.LocalSlot); ok {
ls.N.(*Node).SetUsed(true)
}
}
scratchUsed := false
for _, b := range f.Blocks {
for _, v := range b.Values {
switch a := v.Aux.(type) {
case *ssa.ArgSymbol:
n := a.Node.(*Node)
// Don't modify nodfp; it is a global.
if n != nodfp {
n.SetUsed(true)
}
case *ssa.AutoSymbol:
a.Node.(*Node).SetUsed(true)
}
if !scratchUsed {
scratchUsed = v.Op.UsesScratch()
}
}
}
if f.Config.NeedsFpScratch && scratchUsed {
s.scratchFpMem = tempAt(src.NoXPos, s.curfn, types.Types[TUINT64])
}
sort.Sort(byStackVar(fn.Dcl))
// Reassign stack offsets of the locals that are used.
for i, n := range fn.Dcl {
if n.Op != ONAME || n.Class != PAUTO {
continue
}
if !n.Used() {
fn.Dcl = fn.Dcl[:i]
break
}
dowidth(n.Type)
w := n.Type.Width
if w >= thearch.MAXWIDTH || w < 0 {
Fatalf("bad width")
}
s.stksize += w
s.stksize = Rnd(s.stksize, int64(n.Type.Align))
if types.Haspointers(n.Type) {
s.stkptrsize = s.stksize
}
if thearch.LinkArch.InFamily(sys.MIPS, sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64, sys.S390X) {
s.stksize = Rnd(s.stksize, int64(Widthptr))
}
n.Xoffset = -s.stksize
}
s.stksize = Rnd(s.stksize, int64(Widthreg))
s.stkptrsize = Rnd(s.stkptrsize, int64(Widthreg))
}
func compile(fn *Node) {
Curfn = fn
dowidth(fn.Type)
if fn.Nbody.Len() == 0 {
emitptrargsmap()
return
}
saveerrors()
order(fn)
if nerrors != 0 {
return
}
walk(fn)
if nerrors != 0 {
return
}
checkcontrolflow(fn)
if nerrors != 0 {
return
}
if instrumenting {
instrument(fn)
}
// From this point, there should be no uses of Curfn. Enforce that.
Curfn = nil
// Set up the function's LSym early to avoid data races with the assemblers.
fn.Func.initLSym()
// Build an SSA backend function.
ssafn := buildssa(fn)
pp := newProgs(fn)
genssa(ssafn, pp)
fieldtrack(pp.Text.From.Sym, fn.Func.FieldTrack)
if pp.Text.To.Offset < 1<<31 {
pp.Flush()
} else {
largeStackFrames = append(largeStackFrames, fn.Pos)
}
pp.Free()
}
func debuginfo(fnsym *obj.LSym, curfn interface{}) []*dwarf.Var {
fn := curfn.(*Node)
if expect := Linksym(fn.Func.Nname.Sym); fnsym != expect {
Fatalf("unexpected fnsym: %v != %v", fnsym, expect)
}
var vars []*dwarf.Var
for _, n := range fn.Func.Dcl {
if n.Op != ONAME { // might be OTYPE or OLITERAL
continue
}
var name obj.AddrName
var abbrev int
offs := n.Xoffset
switch n.Class {
case PAUTO:
if !n.Used() {
Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)")
}
name = obj.NAME_AUTO
abbrev = dwarf.DW_ABRV_AUTO
if Ctxt.FixedFrameSize() == 0 {
offs -= int64(Widthptr)
}
if obj.Framepointer_enabled(obj.GOOS, obj.GOARCH) {
offs -= int64(Widthptr)
}
case PPARAM, PPARAMOUT:
name = obj.NAME_PARAM
abbrev = dwarf.DW_ABRV_PARAM
offs += Ctxt.FixedFrameSize()
default:
continue
}
gotype := Linksym(ngotype(n))
fnsym.Autom = append(fnsym.Autom, &obj.Auto{
Asym: Ctxt.Lookup(n.Sym.Name, 0),
Aoffset: int32(n.Xoffset),
Name: name,
Gotype: gotype,
})
if n.IsAutoTmp() {
continue
}
typename := dwarf.InfoPrefix + gotype.Name[len("type."):]
vars = append(vars, &dwarf.Var{
Name: n.Sym.Name,
Abbrev: abbrev,
Offset: int32(offs),
Type: Ctxt.Lookup(typename, 0),
})
}
// Stable sort so that ties are broken with declaration order.
sort.Stable(dwarf.VarsByOffset(vars))
return vars
}
// fieldtrack adds R_USEFIELD relocations to fnsym to record any
// struct fields that it used.
func fieldtrack(fnsym *obj.LSym, tracked map[*types.Sym]struct{}) {
if fnsym == nil {
return
}
if obj.Fieldtrack_enabled == 0 || len(tracked) == 0 {
return
}
trackSyms := make([]*types.Sym, 0, len(tracked))
for sym := range tracked {
trackSyms = append(trackSyms, sym)
}
sort.Sort(symByName(trackSyms))
for _, sym := range trackSyms {
r := obj.Addrel(fnsym)
r.Sym = Linksym(sym)
r.Type = obj.R_USEFIELD
}
}
type symByName []*types.Sym
func (a symByName) Len() int { return len(a) }
func (a symByName) Less(i, j int) bool { return a[i].Name < a[j].Name }
func (a symByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }