mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
The existing bulk/cached Prog allocator, Ctxt.NewProg, is not concurrency-safe. This CL moves Prog allocation to its clients, the compiler and the assembler. The assembler is so fast and generates so few Progs that it does not need optimization of Prog allocation. I could not generate measureable changes. And even if I could, the assembly is a miniscule portion of build times. The compiler already has a natural place to manage Prog allocation; this CL migrates the Prog cache there. It will be made concurrency-safe in a later CL by partitioning the Prog cache into chunks and assigning each chunk to a different goroutine to manage. This CL does cause a performance degradation when the compiler is invoked with the -S flag (to dump assembly). However, such usage is rare and almost always done manually. The one instance I know of in a test is TestAssembly in cmd/compile/internal/gc, and I did not detect a measurable performance impact there. Passes toolstash-check -all. Minor compiler performance impact. Updates #15756 Performance impact from just this CL: name old time/op new time/op delta Template 213ms ± 4% 213ms ± 4% ~ (p=0.571 n=49+49) Unicode 89.1ms ± 3% 89.4ms ± 3% ~ (p=0.388 n=47+48) GoTypes 581ms ± 2% 584ms ± 3% +0.56% (p=0.019 n=47+48) SSA 6.48s ± 2% 6.53s ± 2% +0.84% (p=0.000 n=47+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.832 n=49+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.815 n=48+47) Reflect 371ms ± 4% 371ms ± 3% ~ (p=0.617 n=50+47) Tar 112ms ± 4% 112ms ± 3% ~ (p=0.724 n=49+49) XML 208ms ± 3% 208ms ± 4% ~ (p=0.678 n=49+50) [Geo mean] 284ms 285ms +0.18% name old user-ns/op new user-ns/op delta Template 251M ± 7% 252M ±11% ~ (p=0.704 n=49+50) Unicode 107M ± 7% 108M ± 5% +1.25% (p=0.036 n=50+49) GoTypes 738M ± 3% 740M ± 3% ~ (p=0.305 n=49+48) SSA 8.83G ± 2% 8.86G ± 4% ~ (p=0.098 n=47+50) Flate 146M ± 6% 147M ± 3% ~ (p=0.584 n=48+41) GoParser 178M ± 6% 179M ± 5% +0.93% (p=0.036 n=49+48) Reflect 441M ± 4% 446M ± 7% ~ (p=0.218 n=44+49) Tar 126M ± 5% 126M ± 5% ~ (p=0.766 n=48+49) XML 245M ± 5% 244M ± 4% ~ (p=0.359 n=50+50) [Geo mean] 341M 342M +0.51% Performance impact from this CL combined with its parent: name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.685 n=47+50) Unicode 89.8ms ± 6% 90.5ms ± 6% ~ (p=0.055 n=50+50) GoTypes 584ms ± 3% 585ms ± 2% ~ (p=0.710 n=49+47) SSA 6.50s ± 2% 6.53s ± 2% +0.39% (p=0.011 n=46+50) Flate 128ms ± 3% 128ms ± 4% ~ (p=0.855 n=47+49) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.666 n=49+49) Reflect 371ms ± 3% 372ms ± 3% ~ (p=0.298 n=48+48) Tar 112ms ± 5% 113ms ± 3% ~ (p=0.107 n=49+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.881 n=50+49) [Geo mean] 285ms 285ms +0.26% name old user-ns/op new user-ns/op delta Template 254M ± 9% 252M ± 8% ~ (p=0.290 n=49+50) Unicode 106M ± 6% 108M ± 7% +1.44% (p=0.034 n=50+50) GoTypes 741M ± 4% 743M ± 4% ~ (p=0.992 n=50+49) SSA 8.86G ± 2% 8.83G ± 3% ~ (p=0.158 n=47+49) Flate 147M ± 4% 148M ± 5% ~ (p=0.832 n=50+49) GoParser 179M ± 5% 178M ± 5% ~ (p=0.370 n=48+50) Reflect 441M ± 6% 445M ± 7% ~ (p=0.246 n=45+47) Tar 126M ± 6% 126M ± 6% ~ (p=0.815 n=49+50) XML 244M ± 3% 245M ± 4% ~ (p=0.190 n=50+50) [Geo mean] 342M 342M +0.17% Change-Id: I020f1c079d495fbe2e15ccb51e1ea2cc1b5a1855 Reviewed-on: https://go-review.googlesource.com/39634 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
406 lines
10 KiB
Go
406 lines
10 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package gc
|
|
|
|
import (
|
|
"cmd/compile/internal/ssa"
|
|
"cmd/internal/dwarf"
|
|
"cmd/internal/obj"
|
|
"cmd/internal/src"
|
|
"cmd/internal/sys"
|
|
"fmt"
|
|
"sort"
|
|
)
|
|
|
|
// "Portable" code generation.
|
|
|
|
func makefuncdatasym(pp *Progs, nameprefix string, funcdatakind int64, curfn *Node) *Sym {
|
|
// This symbol requires a unique, reproducible name;
|
|
// unique to avoid duplicate symbols,
|
|
// and reproducible for reproducible builds and toolstash.
|
|
// The function name will usually suffice.
|
|
suffix := curfn.Func.Nname.Sym.Name
|
|
if suffix == "_" {
|
|
// It is possible to have multiple functions called _,
|
|
// so in this rare case, use instead the function's position.
|
|
// This formatted string will be meaningless gibberish, but that's ok.
|
|
// It will be unique and reproducible, and it is rare anyway.
|
|
// Note that we can't just always use the position;
|
|
// it is possible to have multiple autogenerated functions at the same position.
|
|
// Fortunately, no autogenerated functions are called _.
|
|
if curfn.Pos == autogeneratedPos {
|
|
Fatalf("autogenerated func _")
|
|
}
|
|
suffix = fmt.Sprintf("%v", curfn.Pos)
|
|
}
|
|
// Add in the package path as well.
|
|
// When generating wrappers, we can end up compiling a function belonging
|
|
// to other packages, which might have a name that collides with one in our package.
|
|
symname := nameprefix + curfn.Func.Nname.Sym.Pkg.Path + "." + suffix
|
|
|
|
sym := lookup(symname)
|
|
p := pp.Prog(obj.AFUNCDATA)
|
|
Addrconst(&p.From, funcdatakind)
|
|
p.To.Type = obj.TYPE_MEM
|
|
p.To.Name = obj.NAME_EXTERN
|
|
p.To.Sym = Linksym(sym)
|
|
return sym
|
|
}
|
|
|
|
// TODO(mdempsky): Update to reference OpVar{Def,Kill,Live} instead
|
|
// and move to plive.go.
|
|
|
|
// VARDEF is an annotation for the liveness analysis, marking a place
|
|
// where a complete initialization (definition) of a variable begins.
|
|
// Since the liveness analysis can see initialization of single-word
|
|
// variables quite easy, gvardef is usually only called for multi-word
|
|
// or 'fat' variables, those satisfying isfat(n->type).
|
|
// However, gvardef is also called when a non-fat variable is initialized
|
|
// via a block move; the only time this happens is when you have
|
|
// return f()
|
|
// for a function with multiple return values exactly matching the return
|
|
// types of the current function.
|
|
//
|
|
// A 'VARDEF x' annotation in the instruction stream tells the liveness
|
|
// analysis to behave as though the variable x is being initialized at that
|
|
// point in the instruction stream. The VARDEF must appear before the
|
|
// actual (multi-instruction) initialization, and it must also appear after
|
|
// any uses of the previous value, if any. For example, if compiling:
|
|
//
|
|
// x = x[1:]
|
|
//
|
|
// it is important to generate code like:
|
|
//
|
|
// base, len, cap = pieces of x[1:]
|
|
// VARDEF x
|
|
// x = {base, len, cap}
|
|
//
|
|
// If instead the generated code looked like:
|
|
//
|
|
// VARDEF x
|
|
// base, len, cap = pieces of x[1:]
|
|
// x = {base, len, cap}
|
|
//
|
|
// then the liveness analysis would decide the previous value of x was
|
|
// unnecessary even though it is about to be used by the x[1:] computation.
|
|
// Similarly, if the generated code looked like:
|
|
//
|
|
// base, len, cap = pieces of x[1:]
|
|
// x = {base, len, cap}
|
|
// VARDEF x
|
|
//
|
|
// then the liveness analysis will not preserve the new value of x, because
|
|
// the VARDEF appears to have "overwritten" it.
|
|
//
|
|
// VARDEF is a bit of a kludge to work around the fact that the instruction
|
|
// stream is working on single-word values but the liveness analysis
|
|
// wants to work on individual variables, which might be multi-word
|
|
// aggregates. It might make sense at some point to look into letting
|
|
// the liveness analysis work on single-word values as well, although
|
|
// there are complications around interface values, slices, and strings,
|
|
// all of which cannot be treated as individual words.
|
|
//
|
|
// VARKILL is the opposite of VARDEF: it marks a value as no longer needed,
|
|
// even if its address has been taken. That is, a VARKILL annotation asserts
|
|
// that its argument is certainly dead, for use when the liveness analysis
|
|
// would not otherwise be able to deduce that fact.
|
|
|
|
func emitptrargsmap() {
|
|
if Curfn.Func.Nname.Sym.Name == "_" {
|
|
return
|
|
}
|
|
sym := lookup(fmt.Sprintf("%s.args_stackmap", Curfn.Func.Nname.Sym.Name))
|
|
|
|
nptr := int(Curfn.Type.ArgWidth() / int64(Widthptr))
|
|
bv := bvalloc(int32(nptr) * 2)
|
|
nbitmap := 1
|
|
if Curfn.Type.Results().NumFields() > 0 {
|
|
nbitmap = 2
|
|
}
|
|
off := duint32(sym, 0, uint32(nbitmap))
|
|
off = duint32(sym, off, uint32(bv.n))
|
|
var xoffset int64
|
|
if Curfn.IsMethod() {
|
|
xoffset = 0
|
|
onebitwalktype1(Curfn.Type.Recvs(), &xoffset, bv)
|
|
}
|
|
|
|
if Curfn.Type.Params().NumFields() > 0 {
|
|
xoffset = 0
|
|
onebitwalktype1(Curfn.Type.Params(), &xoffset, bv)
|
|
}
|
|
|
|
off = dbvec(sym, off, bv)
|
|
if Curfn.Type.Results().NumFields() > 0 {
|
|
xoffset = 0
|
|
onebitwalktype1(Curfn.Type.Results(), &xoffset, bv)
|
|
off = dbvec(sym, off, bv)
|
|
}
|
|
|
|
ggloblsym(sym, int32(off), obj.RODATA|obj.LOCAL)
|
|
}
|
|
|
|
// cmpstackvarlt reports whether the stack variable a sorts before b.
|
|
//
|
|
// Sort the list of stack variables. Autos after anything else,
|
|
// within autos, unused after used, within used, things with
|
|
// pointers first, zeroed things first, and then decreasing size.
|
|
// Because autos are laid out in decreasing addresses
|
|
// on the stack, pointers first, zeroed things first and decreasing size
|
|
// really means, in memory, things with pointers needing zeroing at
|
|
// the top of the stack and increasing in size.
|
|
// Non-autos sort on offset.
|
|
func cmpstackvarlt(a, b *Node) bool {
|
|
if (a.Class == PAUTO) != (b.Class == PAUTO) {
|
|
return b.Class == PAUTO
|
|
}
|
|
|
|
if a.Class != PAUTO {
|
|
return a.Xoffset < b.Xoffset
|
|
}
|
|
|
|
if a.Used() != b.Used() {
|
|
return a.Used()
|
|
}
|
|
|
|
ap := haspointers(a.Type)
|
|
bp := haspointers(b.Type)
|
|
if ap != bp {
|
|
return ap
|
|
}
|
|
|
|
ap = a.Name.Needzero()
|
|
bp = b.Name.Needzero()
|
|
if ap != bp {
|
|
return ap
|
|
}
|
|
|
|
if a.Type.Width != b.Type.Width {
|
|
return a.Type.Width > b.Type.Width
|
|
}
|
|
|
|
return a.Sym.Name < b.Sym.Name
|
|
}
|
|
|
|
// byStackvar implements sort.Interface for []*Node using cmpstackvarlt.
|
|
type byStackVar []*Node
|
|
|
|
func (s byStackVar) Len() int { return len(s) }
|
|
func (s byStackVar) Less(i, j int) bool { return cmpstackvarlt(s[i], s[j]) }
|
|
func (s byStackVar) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
|
|
func (s *ssafn) AllocFrame(f *ssa.Func) {
|
|
s.stksize = 0
|
|
s.stkptrsize = 0
|
|
fn := s.curfn.Func
|
|
|
|
// Mark the PAUTO's unused.
|
|
for _, ln := range fn.Dcl {
|
|
if ln.Class == PAUTO {
|
|
ln.SetUsed(false)
|
|
}
|
|
}
|
|
|
|
for _, l := range f.RegAlloc {
|
|
if ls, ok := l.(ssa.LocalSlot); ok {
|
|
ls.N.(*Node).SetUsed(true)
|
|
}
|
|
}
|
|
|
|
scratchUsed := false
|
|
for _, b := range f.Blocks {
|
|
for _, v := range b.Values {
|
|
switch a := v.Aux.(type) {
|
|
case *ssa.ArgSymbol:
|
|
n := a.Node.(*Node)
|
|
// Don't modify nodfp; it is a global.
|
|
if n != nodfp {
|
|
n.SetUsed(true)
|
|
}
|
|
case *ssa.AutoSymbol:
|
|
a.Node.(*Node).SetUsed(true)
|
|
}
|
|
|
|
if !scratchUsed {
|
|
scratchUsed = v.Op.UsesScratch()
|
|
}
|
|
}
|
|
}
|
|
|
|
if f.Config.NeedsFpScratch && scratchUsed {
|
|
s.scratchFpMem = tempAt(src.NoXPos, s.curfn, Types[TUINT64])
|
|
}
|
|
|
|
sort.Sort(byStackVar(fn.Dcl))
|
|
|
|
// Reassign stack offsets of the locals that are used.
|
|
for i, n := range fn.Dcl {
|
|
if n.Op != ONAME || n.Class != PAUTO {
|
|
continue
|
|
}
|
|
if !n.Used() {
|
|
fn.Dcl = fn.Dcl[:i]
|
|
break
|
|
}
|
|
|
|
dowidth(n.Type)
|
|
w := n.Type.Width
|
|
if w >= thearch.MAXWIDTH || w < 0 {
|
|
Fatalf("bad width")
|
|
}
|
|
s.stksize += w
|
|
s.stksize = Rnd(s.stksize, int64(n.Type.Align))
|
|
if haspointers(n.Type) {
|
|
s.stkptrsize = s.stksize
|
|
}
|
|
if thearch.LinkArch.InFamily(sys.MIPS, sys.MIPS64, sys.ARM, sys.ARM64, sys.PPC64, sys.S390X) {
|
|
s.stksize = Rnd(s.stksize, int64(Widthptr))
|
|
}
|
|
n.Xoffset = -s.stksize
|
|
}
|
|
|
|
s.stksize = Rnd(s.stksize, int64(Widthreg))
|
|
s.stkptrsize = Rnd(s.stkptrsize, int64(Widthreg))
|
|
}
|
|
|
|
func compile(fn *Node) {
|
|
Curfn = fn
|
|
dowidth(fn.Type)
|
|
|
|
if fn.Nbody.Len() == 0 {
|
|
emitptrargsmap()
|
|
return
|
|
}
|
|
|
|
saveerrors()
|
|
|
|
order(fn)
|
|
if nerrors != 0 {
|
|
return
|
|
}
|
|
|
|
walk(fn)
|
|
if nerrors != 0 {
|
|
return
|
|
}
|
|
checkcontrolflow(fn)
|
|
if nerrors != 0 {
|
|
return
|
|
}
|
|
if instrumenting {
|
|
instrument(fn)
|
|
}
|
|
|
|
// From this point, there should be no uses of Curfn. Enforce that.
|
|
Curfn = nil
|
|
|
|
// Build an SSA backend function.
|
|
ssafn := buildssa(fn)
|
|
pp := newProgs(fn)
|
|
genssa(ssafn, pp)
|
|
fieldtrack(pp.Text.From.Sym, fn.Func.FieldTrack)
|
|
if pp.Text.To.Offset < 1<<31 {
|
|
pp.Flush()
|
|
} else {
|
|
largeStackFrames = append(largeStackFrames, fn.Pos)
|
|
}
|
|
pp.Free()
|
|
}
|
|
|
|
func debuginfo(fnsym *obj.LSym, curfn interface{}) []*dwarf.Var {
|
|
fn := curfn.(*Node)
|
|
if expect := Linksym(fn.Func.Nname.Sym); fnsym != expect {
|
|
Fatalf("unexpected fnsym: %v != %v", fnsym, expect)
|
|
}
|
|
|
|
var vars []*dwarf.Var
|
|
for _, n := range fn.Func.Dcl {
|
|
if n.Op != ONAME { // might be OTYPE or OLITERAL
|
|
continue
|
|
}
|
|
|
|
var name obj.AddrName
|
|
var abbrev int
|
|
offs := n.Xoffset
|
|
|
|
switch n.Class {
|
|
case PAUTO:
|
|
if !n.Used() {
|
|
Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)")
|
|
}
|
|
name = obj.NAME_AUTO
|
|
|
|
abbrev = dwarf.DW_ABRV_AUTO
|
|
if Ctxt.FixedFrameSize() == 0 {
|
|
offs -= int64(Widthptr)
|
|
}
|
|
if obj.Framepointer_enabled(obj.GOOS, obj.GOARCH) {
|
|
offs -= int64(Widthptr)
|
|
}
|
|
|
|
case PPARAM, PPARAMOUT:
|
|
name = obj.NAME_PARAM
|
|
|
|
abbrev = dwarf.DW_ABRV_PARAM
|
|
offs += Ctxt.FixedFrameSize()
|
|
|
|
default:
|
|
continue
|
|
}
|
|
|
|
gotype := Linksym(ngotype(n))
|
|
fnsym.Autom = append(fnsym.Autom, &obj.Auto{
|
|
Asym: obj.Linklookup(Ctxt, n.Sym.Name, 0),
|
|
Aoffset: int32(n.Xoffset),
|
|
Name: name,
|
|
Gotype: gotype,
|
|
})
|
|
|
|
if n.IsAutoTmp() {
|
|
continue
|
|
}
|
|
|
|
typename := dwarf.InfoPrefix + gotype.Name[len("type."):]
|
|
vars = append(vars, &dwarf.Var{
|
|
Name: n.Sym.Name,
|
|
Abbrev: abbrev,
|
|
Offset: int32(offs),
|
|
Type: obj.Linklookup(Ctxt, typename, 0),
|
|
})
|
|
}
|
|
|
|
// Stable sort so that ties are broken with declaration order.
|
|
sort.Stable(dwarf.VarsByOffset(vars))
|
|
|
|
return vars
|
|
}
|
|
|
|
// fieldtrack adds R_USEFIELD relocations to fnsym to record any
|
|
// struct fields that it used.
|
|
func fieldtrack(fnsym *obj.LSym, tracked map[*Sym]struct{}) {
|
|
if fnsym == nil {
|
|
return
|
|
}
|
|
if obj.Fieldtrack_enabled == 0 || len(tracked) == 0 {
|
|
return
|
|
}
|
|
|
|
trackSyms := make([]*Sym, 0, len(tracked))
|
|
for sym := range tracked {
|
|
trackSyms = append(trackSyms, sym)
|
|
}
|
|
sort.Sort(symByName(trackSyms))
|
|
for _, sym := range trackSyms {
|
|
r := obj.Addrel(fnsym)
|
|
r.Sym = Linksym(sym)
|
|
r.Type = obj.R_USEFIELD
|
|
}
|
|
}
|
|
|
|
type symByName []*Sym
|
|
|
|
func (a symByName) Len() int { return len(a) }
|
|
func (a symByName) Less(i, j int) bool { return a[i].Name < a[j].Name }
|
|
func (a symByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|