go/src/cmd/compile/internal/gc/pgen.go
Josh Bleecher Snyder 5c359d8083 cmd/compile: add Prog cache to Progs
The existing bulk/cached Prog allocator, Ctxt.NewProg, is not concurrency-safe.
This CL moves Prog allocation to its clients, the compiler and the assembler.

The assembler is so fast and generates so few Progs that it does not need
optimization of Prog allocation. I could not generate measureable changes.
And even if I could, the assembly is a miniscule portion of build times.

The compiler already has a natural place to manage Prog allocation;
this CL migrates the Prog cache there.
It will be made concurrency-safe in a later CL by
partitioning the Prog cache into chunks and assigning each chunk
to a different goroutine to manage.

This CL does cause a performance degradation when the compiler
is invoked with the -S flag (to dump assembly).
However, such usage is rare and almost always done manually.
The one instance I know of in a test is TestAssembly
in cmd/compile/internal/gc, and I did not detect
a measurable performance impact there.

Passes toolstash-check -all.
Minor compiler performance impact.

Updates #15756

Performance impact from just this CL:

name        old time/op     new time/op     delta
Template        213ms ± 4%      213ms ± 4%    ~     (p=0.571 n=49+49)
Unicode        89.1ms ± 3%     89.4ms ± 3%    ~     (p=0.388 n=47+48)
GoTypes         581ms ± 2%      584ms ± 3%  +0.56%  (p=0.019 n=47+48)
SSA             6.48s ± 2%      6.53s ± 2%  +0.84%  (p=0.000 n=47+49)
Flate           128ms ± 4%      128ms ± 4%    ~     (p=0.832 n=49+49)
GoParser        152ms ± 3%      152ms ± 3%    ~     (p=0.815 n=48+47)
Reflect         371ms ± 4%      371ms ± 3%    ~     (p=0.617 n=50+47)
Tar             112ms ± 4%      112ms ± 3%    ~     (p=0.724 n=49+49)
XML             208ms ± 3%      208ms ± 4%    ~     (p=0.678 n=49+50)
[Geo mean]      284ms           285ms       +0.18%

name        old user-ns/op  new user-ns/op  delta
Template         251M ± 7%       252M ±11%    ~     (p=0.704 n=49+50)
Unicode          107M ± 7%       108M ± 5%  +1.25%  (p=0.036 n=50+49)
GoTypes          738M ± 3%       740M ± 3%    ~     (p=0.305 n=49+48)
SSA             8.83G ± 2%      8.86G ± 4%    ~     (p=0.098 n=47+50)
Flate            146M ± 6%       147M ± 3%    ~     (p=0.584 n=48+41)
GoParser         178M ± 6%       179M ± 5%  +0.93%  (p=0.036 n=49+48)
Reflect          441M ± 4%       446M ± 7%    ~     (p=0.218 n=44+49)
Tar              126M ± 5%       126M ± 5%    ~     (p=0.766 n=48+49)
XML              245M ± 5%       244M ± 4%    ~     (p=0.359 n=50+50)
[Geo mean]       341M            342M       +0.51%

Performance impact from this CL combined with its parent:

name        old time/op     new time/op     delta
Template        213ms ± 3%      214ms ± 4%    ~     (p=0.685 n=47+50)
Unicode        89.8ms ± 6%     90.5ms ± 6%    ~     (p=0.055 n=50+50)
GoTypes         584ms ± 3%      585ms ± 2%    ~     (p=0.710 n=49+47)
SSA             6.50s ± 2%      6.53s ± 2%  +0.39%  (p=0.011 n=46+50)
Flate           128ms ± 3%      128ms ± 4%    ~     (p=0.855 n=47+49)
GoParser        152ms ± 3%      152ms ± 3%    ~     (p=0.666 n=49+49)
Reflect         371ms ± 3%      372ms ± 3%    ~     (p=0.298 n=48+48)
Tar             112ms ± 5%      113ms ± 3%    ~     (p=0.107 n=49+49)
XML             208ms ± 3%      208ms ± 2%    ~     (p=0.881 n=50+49)
[Geo mean]      285ms           285ms       +0.26%

name        old user-ns/op  new user-ns/op  delta
Template         254M ± 9%       252M ± 8%    ~     (p=0.290 n=49+50)
Unicode          106M ± 6%       108M ± 7%  +1.44%  (p=0.034 n=50+50)
GoTypes          741M ± 4%       743M ± 4%    ~     (p=0.992 n=50+49)
SSA             8.86G ± 2%      8.83G ± 3%    ~     (p=0.158 n=47+49)
Flate            147M ± 4%       148M ± 5%    ~     (p=0.832 n=50+49)
GoParser         179M ± 5%       178M ± 5%    ~     (p=0.370 n=48+50)
Reflect          441M ± 6%       445M ± 7%    ~     (p=0.246 n=45+47)
Tar              126M ± 6%       126M ± 6%    ~     (p=0.815 n=49+50)
XML              244M ± 3%       245M ± 4%    ~     (p=0.190 n=50+50)
[Geo mean]       342M            342M       +0.17%

Change-Id: I020f1c079d495fbe2e15ccb51e1ea2cc1b5a1855
Reviewed-on: https://go-review.googlesource.com/39634
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-06 04:53:50 +00:00

406 lines
10 KiB
Go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gc
import (
"cmd/compile/internal/ssa"
"cmd/internal/dwarf"
"cmd/internal/obj"
"cmd/internal/src"
"cmd/internal/sys"
"fmt"
"sort"
)
// "Portable" code generation.
func makefuncdatasym(pp *Progs, nameprefix string, funcdatakind int64, curfn *Node) *Sym {
// This symbol requires a unique, reproducible name;
// unique to avoid duplicate symbols,
// and reproducible for reproducible builds and toolstash.
// The function name will usually suffice.
suffix := curfn.Func.Nname.Sym.Name
if suffix == "_" {
// It is possible to have multiple functions called _,
// so in this rare case, use instead the function's position.
// This formatted string will be meaningless gibberish, but that's ok.
// It will be unique and reproducible, and it is rare anyway.
// Note that we can't just always use the position;
// it is possible to have multiple autogenerated functions at the same position.
// Fortunately, no autogenerated functions are called _.
if curfn.Pos == autogeneratedPos {
Fatalf("autogenerated func _")
}
suffix = fmt.Sprintf("%v", curfn.Pos)
}
// Add in the package path as well.
// When generating wrappers, we can end up compiling a function belonging
// to other packages, which might have a name that collides with one in our package.
symname := nameprefix + curfn.Func.Nname.Sym.Pkg.Path + "." + suffix
sym := lookup(symname)
p := pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, funcdatakind)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = Linksym(sym)
return sym
}
// TODO(mdempsky): Update to reference OpVar{Def,Kill,Live} instead
// and move to plive.go.
// VARDEF is an annotation for the liveness analysis, marking a place
// where a complete initialization (definition) of a variable begins.
// Since the liveness analysis can see initialization of single-word
// variables quite easy, gvardef is usually only called for multi-word
// or 'fat' variables, those satisfying isfat(n->type).
// However, gvardef is also called when a non-fat variable is initialized
// via a block move; the only time this happens is when you have
// return f()
// for a function with multiple return values exactly matching the return
// types of the current function.
//
// A 'VARDEF x' annotation in the instruction stream tells the liveness
// analysis to behave as though the variable x is being initialized at that
// point in the instruction stream. The VARDEF must appear before the
// actual (multi-instruction) initialization, and it must also appear after
// any uses of the previous value, if any. For example, if compiling:
//
// x = x[1:]
//
// it is important to generate code like:
//
// base, len, cap = pieces of x[1:]
// VARDEF x
// x = {base, len, cap}
//
// If instead the generated code looked like:
//
// VARDEF x
// base, len, cap = pieces of x[1:]
// x = {base, len, cap}
//
// then the liveness analysis would decide the previous value of x was
// unnecessary even though it is about to be used by the x[1:] computation.
// Similarly, if the generated code looked like:
//
// base, len, cap = pieces of x[1:]
// x = {base, len, cap}
// VARDEF x
//
// then the liveness analysis will not preserve the new value of x, because
// the VARDEF appears to have "overwritten" it.
//
// VARDEF is a bit of a kludge to work around the fact that the instruction
// stream is working on single-word values but the liveness analysis
// wants to work on individual variables, which might be multi-word
// aggregates. It might make sense at some point to look into letting
// the liveness analysis work on single-word values as well, although
// there are complications around interface values, slices, and strings,
// all of which cannot be treated as individual words.
//
// VARKILL is the opposite of VARDEF: it marks a value as no longer needed,
// even if its address has been taken. That is, a VARKILL annotation asserts
// that its argument is certainly dead, for use when the liveness analysis
// would not otherwise be able to deduce that fact.
func emitptrargsmap() {
if Curfn.Func.Nname.Sym.Name == "_" {
return
}
sym := lookup(fmt.Sprintf("%s.args_stackmap", Curfn.Func.Nname.Sym.Name))
nptr := int(Curfn.Type.ArgWidth() / int64(Widthptr))
bv := bvalloc(int32(nptr) * 2)
nbitmap := 1
if Curfn.Type.Results().NumFields() > 0 {
nbitmap = 2
}
off := duint32(sym, 0, uint32(nbitmap))
off = duint32(sym, off, uint32(bv.n))
var xoffset int64
if Curfn.IsMethod() {
xoffset = 0
onebitwalktype1(Curfn.Type.Recvs(), &xoffset, bv)
}
if Curfn.Type.Params().NumFields() > 0 {
xoffset = 0
onebitwalktype1(Curfn.Type.Params(), &xoffset, bv)
}
off = dbvec(sym, off, bv)
if Curfn.Type.Results().NumFields() > 0 {
xoffset = 0
onebitwalktype1(Curfn.Type.Results(), &xoffset, bv)
off = dbvec(sym, off, bv)
}
ggloblsym(sym, int32(off), obj.RODATA|obj.LOCAL)
}
// cmpstackvarlt reports whether the stack variable a sorts before b.
//
// Sort the list of stack variables. Autos after anything else,
// within autos, unused after used, within used, things with
// pointers first, zeroed things first, and then decreasing size.
// Because autos are laid out in decreasing addresses
// on the stack, pointers first, zeroed things first and decreasing size
// really means, in memory, things with pointers needing zeroing at
// the top of the stack and increasing in size.
// Non-autos sort on offset.
func cmpstackvarlt(a, b *Node) bool {
if (a.Class == PAUTO) != (b.Class == PAUTO) {
return b.Class == PAUTO
}
if a.Class != PAUTO {
return a.Xoffset < b.Xoffset
}
if a.Used() != b.Used() {
return a.Used()
}
ap := haspointers(a.Type)
bp := haspointers(b.Type)
if ap != bp {
return ap
}
ap = a.Name.Needzero()
bp = b.Name.Needzero()
if ap != bp {
return ap
}
if a.Type.Width != b.Type.Width {
return a.Type.Width > b.Type.Width
}
return a.Sym.Name < b.Sym.Name
}
// byStackvar implements sort.Interface for []*Node using cmpstackvarlt.
type byStackVar []*Node
func (s byStackVar) Len() int { return len(s) }
func (s byStackVar) Less(i, j int) bool { return cmpstackvarlt(s[i], s[j]) }
func (s byStackVar) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s *ssafn) AllocFrame(f *ssa.Func) {
s.stksize = 0
s.stkptrsize = 0
fn := s.curfn.Func
// Mark the PAUTO's unused.
for _, ln := range fn.Dcl {
if ln.Class == PAUTO {
ln.SetUsed(false)
}
}
for _, l := range f.RegAlloc {
if ls, ok := l.(ssa.LocalSlot); ok {
ls.N.(*Node).SetUsed(true)
}
}
scratchUsed := false
for _, b := range f.Blocks {
for _, v := range b.Values {
switch a := v.Aux.(type) {
case *ssa.ArgSymbol:
n := a.Node.(*Node)
// Don't modify nodfp; it is a global.
if n != nodfp {
n.SetUsed(true)
}
case *ssa.AutoSymbol:
a.Node.(*Node).SetUsed(true)
}
if !scratchUsed {
scratchUsed = v.Op.UsesScratch()
}
}
}
if f.Config.NeedsFpScratch && scratchUsed {
s.scratchFpMem = tempAt(src.NoXPos, s.curfn, Types[TUINT64])
}
sort.Sort(byStackVar(fn.Dcl))
// Reassign stack offsets of the locals that are used.
for i, n := range fn.Dcl {
if n.Op != ONAME || n.Class != PAUTO {
continue
}
if !n.Used() {
fn.Dcl = fn.Dcl[:i]
break
}
dowidth(n.Type)
w := n.Type.Width
if w >= thearch.MAXWIDTH || w < 0 {
Fatalf("bad width")
}
s.stksize += w
s.stksize = Rnd(s.stksize, int64(n.Type.Align))
if haspointers(n.Type) {
s.stkptrsize = s.stksize
}
if thearch.LinkArch.InFamily(sys.MIPS, sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64, sys.S390X) {
s.stksize = Rnd(s.stksize, int64(Widthptr))
}
n.Xoffset = -s.stksize
}
s.stksize = Rnd(s.stksize, int64(Widthreg))
s.stkptrsize = Rnd(s.stkptrsize, int64(Widthreg))
}
func compile(fn *Node) {
Curfn = fn
dowidth(fn.Type)
if fn.Nbody.Len() == 0 {
emitptrargsmap()
return
}
saveerrors()
order(fn)
if nerrors != 0 {
return
}
walk(fn)
if nerrors != 0 {
return
}
checkcontrolflow(fn)
if nerrors != 0 {
return
}
if instrumenting {
instrument(fn)
}
// From this point, there should be no uses of Curfn. Enforce that.
Curfn = nil
// Build an SSA backend function.
ssafn := buildssa(fn)
pp := newProgs(fn)
genssa(ssafn, pp)
fieldtrack(pp.Text.From.Sym, fn.Func.FieldTrack)
if pp.Text.To.Offset < 1<<31 {
pp.Flush()
} else {
largeStackFrames = append(largeStackFrames, fn.Pos)
}
pp.Free()
}
func debuginfo(fnsym *obj.LSym, curfn interface{}) []*dwarf.Var {
fn := curfn.(*Node)
if expect := Linksym(fn.Func.Nname.Sym); fnsym != expect {
Fatalf("unexpected fnsym: %v != %v", fnsym, expect)
}
var vars []*dwarf.Var
for _, n := range fn.Func.Dcl {
if n.Op != ONAME { // might be OTYPE or OLITERAL
continue
}
var name obj.AddrName
var abbrev int
offs := n.Xoffset
switch n.Class {
case PAUTO:
if !n.Used() {
Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)")
}
name = obj.NAME_AUTO
abbrev = dwarf.DW_ABRV_AUTO
if Ctxt.FixedFrameSize() == 0 {
offs -= int64(Widthptr)
}
if obj.Framepointer_enabled(obj.GOOS, obj.GOARCH) {
offs -= int64(Widthptr)
}
case PPARAM, PPARAMOUT:
name = obj.NAME_PARAM
abbrev = dwarf.DW_ABRV_PARAM
offs += Ctxt.FixedFrameSize()
default:
continue
}
gotype := Linksym(ngotype(n))
fnsym.Autom = append(fnsym.Autom, &obj.Auto{
Asym: obj.Linklookup(Ctxt, n.Sym.Name, 0),
Aoffset: int32(n.Xoffset),
Name: name,
Gotype: gotype,
})
if n.IsAutoTmp() {
continue
}
typename := dwarf.InfoPrefix + gotype.Name[len("type."):]
vars = append(vars, &dwarf.Var{
Name: n.Sym.Name,
Abbrev: abbrev,
Offset: int32(offs),
Type: obj.Linklookup(Ctxt, typename, 0),
})
}
// Stable sort so that ties are broken with declaration order.
sort.Stable(dwarf.VarsByOffset(vars))
return vars
}
// fieldtrack adds R_USEFIELD relocations to fnsym to record any
// struct fields that it used.
func fieldtrack(fnsym *obj.LSym, tracked map[*Sym]struct{}) {
if fnsym == nil {
return
}
if obj.Fieldtrack_enabled == 0 || len(tracked) == 0 {
return
}
trackSyms := make([]*Sym, 0, len(tracked))
for sym := range tracked {
trackSyms = append(trackSyms, sym)
}
sort.Sort(symByName(trackSyms))
for _, sym := range trackSyms {
r := obj.Addrel(fnsym)
r.Sym = Linksym(sym)
r.Type = obj.R_USEFIELD
}
}
type symByName []*Sym
func (a symByName) Len() int { return len(a) }
func (a symByName) Less(i, j int) bool { return a[i].Name < a[j].Name }
func (a symByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }