go/src/cmd/compile/internal/gc/obj.go
Austin Clements 9f95c9db23 cmd/compile, cmd/internal/obj: record register maps in binary
This adds FUNCDATA and PCDATA that records the register maps much like
the existing live arguments maps and live locals maps. The register
map is indexed independently from the argument and locals maps since
changes in register liveness tend not to correlate with changes to
argument and local liveness.

This is the final CL toward adding safe-points everywhere. The
following CLs will optimize liveness analysis to bring down the cost.
The effect of this CL is:

name        old time/op       new time/op       delta
Template          195ms ± 2%        197ms ± 1%    ~     (p=0.136 n=9+9)
Unicode          98.4ms ± 2%       99.7ms ± 1%  +1.39%  (p=0.004 n=10+10)
GoTypes           685ms ± 1%        700ms ± 1%  +2.06%  (p=0.000 n=9+9)
Compiler          3.28s ± 1%        3.34s ± 0%  +1.71%  (p=0.000 n=9+8)
SSA               7.79s ± 1%        7.91s ± 1%  +1.55%  (p=0.000 n=10+9)
Flate             133ms ± 2%        133ms ± 2%    ~     (p=0.190 n=10+10)
GoParser          161ms ± 2%        164ms ± 3%  +1.83%  (p=0.015 n=10+10)
Reflect           450ms ± 1%        457ms ± 1%  +1.62%  (p=0.000 n=10+10)
Tar               183ms ± 2%        185ms ± 1%  +0.91%  (p=0.008 n=9+10)
XML               234ms ± 1%        238ms ± 1%  +1.60%  (p=0.000 n=9+9)
[Geo mean]        411ms             417ms       +1.40%

name        old exe-bytes     new exe-bytes     delta
HelloSize         1.47M ± 0%        1.51M ± 0%  +2.79%  (p=0.000 n=10+10)

Compared to just before "cmd/internal/obj: consolidate emitting entry
stack map", the cumulative effect of adding stack maps everywhere and
register maps is:

name        old time/op       new time/op       delta
Template          185ms ± 2%        197ms ± 1%   +6.42%  (p=0.000 n=10+9)
Unicode          96.3ms ± 3%       99.7ms ± 1%   +3.60%  (p=0.000 n=10+10)
GoTypes           658ms ± 0%        700ms ± 1%   +6.37%  (p=0.000 n=10+9)
Compiler          3.14s ± 1%        3.34s ± 0%   +6.53%  (p=0.000 n=9+8)
SSA               7.41s ± 2%        7.91s ± 1%   +6.71%  (p=0.000 n=9+9)
Flate             126ms ± 1%        133ms ± 2%   +6.15%  (p=0.000 n=10+10)
GoParser          153ms ± 1%        164ms ± 3%   +6.89%  (p=0.000 n=10+10)
Reflect           437ms ± 1%        457ms ± 1%   +4.59%  (p=0.000 n=10+10)
Tar               178ms ± 1%        185ms ± 1%   +4.18%  (p=0.000 n=10+10)
XML               223ms ± 1%        238ms ± 1%   +6.39%  (p=0.000 n=10+9)
[Geo mean]        394ms             417ms        +5.78%

name        old alloc/op      new alloc/op      delta
Template         34.5MB ± 0%       38.0MB ± 0%  +10.19%  (p=0.000 n=10+10)
Unicode          29.3MB ± 0%       30.3MB ± 0%   +3.56%  (p=0.000 n=8+9)
GoTypes           113MB ± 0%        125MB ± 0%  +10.89%  (p=0.000 n=10+10)
Compiler          510MB ± 0%        575MB ± 0%  +12.79%  (p=0.000 n=10+10)
SSA              1.46GB ± 0%       1.64GB ± 0%  +12.40%  (p=0.000 n=10+10)
Flate            23.9MB ± 0%       25.9MB ± 0%   +8.56%  (p=0.000 n=10+10)
GoParser         28.0MB ± 0%       30.8MB ± 0%  +10.08%  (p=0.000 n=10+10)
Reflect          77.6MB ± 0%       84.3MB ± 0%   +8.63%  (p=0.000 n=10+10)
Tar              34.1MB ± 0%       37.0MB ± 0%   +8.44%  (p=0.000 n=10+10)
XML              42.7MB ± 0%       47.2MB ± 0%  +10.75%  (p=0.000 n=10+10)
[Geo mean]       76.0MB            83.3MB        +9.60%

name        old allocs/op     new allocs/op     delta
Template           321k ± 0%         337k ± 0%   +4.98%  (p=0.000 n=10+10)
Unicode            337k ± 0%         340k ± 0%   +1.04%  (p=0.000 n=10+9)
GoTypes           1.13M ± 0%        1.18M ± 0%   +4.85%  (p=0.000 n=10+10)
Compiler          4.67M ± 0%        4.96M ± 0%   +6.25%  (p=0.000 n=10+10)
SSA               11.7M ± 0%        12.3M ± 0%   +5.69%  (p=0.000 n=10+10)
Flate              216k ± 0%         226k ± 0%   +4.52%  (p=0.000 n=10+9)
GoParser           271k ± 0%         283k ± 0%   +4.52%  (p=0.000 n=10+10)
Reflect            927k ± 0%         972k ± 0%   +4.78%  (p=0.000 n=10+10)
Tar                318k ± 0%         333k ± 0%   +4.56%  (p=0.000 n=10+10)
XML                376k ± 0%         395k ± 0%   +5.04%  (p=0.000 n=10+10)
[Geo mean]         730k              764k        +4.61%

name        old exe-bytes     new exe-bytes     delta
HelloSize         1.46M ± 0%        1.51M ± 0%   +3.66%  (p=0.000 n=10+10)

For #24543.

Change-Id: I91e003dc64151916b384274884bf02a2d6862547
Reviewed-on: https://go-review.googlesource.com/109353
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-05-22 15:55:03 +00:00

485 lines
11 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gc
import (
"cmd/compile/internal/types"
"cmd/internal/bio"
"cmd/internal/obj"
"cmd/internal/objabi"
"cmd/internal/src"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"strconv"
)
// architecture-independent object file output
const ArhdrSize = 60
func formathdr(arhdr []byte, name string, size int64) {
copy(arhdr[:], fmt.Sprintf("%-16s%-12d%-6d%-6d%-8o%-10d`\n", name, 0, 0, 0, 0644, size))
}
// These modes say which kind of object file to generate.
// The default use of the toolchain is to set both bits,
// generating a combined compiler+linker object, one that
// serves to describe the package to both the compiler and the linker.
// In fact the compiler and linker read nearly disjoint sections of
// that file, though, so in a distributed build setting it can be more
// efficient to split the output into two files, supplying the compiler
// object only to future compilations and the linker object only to
// future links.
//
// By default a combined object is written, but if -linkobj is specified
// on the command line then the default -o output is a compiler object
// and the -linkobj output is a linker object.
const (
modeCompilerObj = 1 << iota
modeLinkerObj
)
func dumpobj() {
if !dolinkobj {
dumpobj1(outfile, modeCompilerObj)
return
}
if linkobj == "" {
dumpobj1(outfile, modeCompilerObj|modeLinkerObj)
return
}
dumpobj1(outfile, modeCompilerObj)
dumpobj1(linkobj, modeLinkerObj)
}
func dumpobj1(outfile string, mode int) {
bout, err := bio.Create(outfile)
if err != nil {
flusherrors()
fmt.Printf("can't create %s: %v\n", outfile, err)
errorexit()
}
defer bout.Close()
bout.WriteString("!<arch>\n")
if mode&modeCompilerObj != 0 {
start := startArchiveEntry(bout)
dumpCompilerObj(bout)
finishArchiveEntry(bout, start, "__.PKGDEF")
}
if mode&modeLinkerObj != 0 {
start := startArchiveEntry(bout)
dumpLinkerObj(bout)
finishArchiveEntry(bout, start, "_go_.o")
}
}
func printObjHeader(bout *bio.Writer) {
fmt.Fprintf(bout, "go object %s %s %s %s\n", objabi.GOOS, objabi.GOARCH, objabi.Version, objabi.Expstring())
if buildid != "" {
fmt.Fprintf(bout, "build id %q\n", buildid)
}
if localpkg.Name == "main" {
fmt.Fprintf(bout, "main\n")
}
if safemode {
fmt.Fprintf(bout, "safe\n")
} else {
fmt.Fprintf(bout, "----\n") // room for some other tool to write "safe"
}
fmt.Fprintf(bout, "\n") // header ends with blank line
}
func startArchiveEntry(bout *bio.Writer) int64 {
var arhdr [ArhdrSize]byte
bout.Write(arhdr[:])
return bout.Offset()
}
func finishArchiveEntry(bout *bio.Writer, start int64, name string) {
bout.Flush()
size := bout.Offset() - start
if size&1 != 0 {
bout.WriteByte(0)
}
bout.Seek(start-ArhdrSize, 0)
var arhdr [ArhdrSize]byte
formathdr(arhdr[:], name, size)
bout.Write(arhdr[:])
bout.Flush()
bout.Seek(start+size+(size&1), 0)
}
func dumpCompilerObj(bout *bio.Writer) {
printObjHeader(bout)
dumpexport(bout)
}
func dumpLinkerObj(bout *bio.Writer) {
printObjHeader(bout)
if len(pragcgobuf) != 0 {
// write empty export section; must be before cgo section
fmt.Fprintf(bout, "\n$$\n\n$$\n\n")
fmt.Fprintf(bout, "\n$$ // cgo\n")
if err := json.NewEncoder(bout).Encode(pragcgobuf); err != nil {
Fatalf("serializing pragcgobuf: %v", err)
}
fmt.Fprintf(bout, "\n$$\n\n")
}
fmt.Fprintf(bout, "\n!\n")
externs := len(externdcl)
dumpglobls()
addptabs()
addsignats(externdcl)
dumpsignats()
dumptabs()
dumpimportstrings()
dumpbasictypes()
// Calls to dumpsignats can generate functions,
// like method wrappers and hash and equality routines.
// Compile any generated functions, process any new resulting types, repeat.
// This can't loop forever, because there is no way to generate an infinite
// number of types in a finite amount of code.
// In the typical case, we loop 0 or 1 times.
// It was not until issue 24761 that we found any code that required a loop at all.
for len(compilequeue) > 0 {
compileFunctions()
dumpsignats()
}
// Dump extra globals.
tmp := externdcl
if externdcl != nil {
externdcl = externdcl[externs:]
}
dumpglobls()
externdcl = tmp
if zerosize > 0 {
zero := mappkg.Lookup("zero")
ggloblsym(zero.Linksym(), int32(zerosize), obj.DUPOK|obj.RODATA)
}
addGCLocals()
obj.WriteObjFile(Ctxt, bout.Writer)
}
func addptabs() {
if !Ctxt.Flag_dynlink || localpkg.Name != "main" {
return
}
for _, exportn := range exportlist {
s := exportn.Sym
n := asNode(s.Def)
if n == nil {
continue
}
if n.Op != ONAME {
continue
}
if !types.IsExported(s.Name) {
continue
}
if s.Pkg.Name != "main" {
continue
}
if n.Type.Etype == TFUNC && n.Class() == PFUNC {
// function
ptabs = append(ptabs, ptabEntry{s: s, t: asNode(s.Def).Type})
} else {
// variable
ptabs = append(ptabs, ptabEntry{s: s, t: types.NewPtr(asNode(s.Def).Type)})
}
}
}
func dumpGlobal(n *Node) {
if n.Type == nil {
Fatalf("external %v nil type\n", n)
}
if n.Class() == PFUNC {
return
}
if n.Sym.Pkg != localpkg {
return
}
dowidth(n.Type)
ggloblnod(n)
}
func dumpGlobalConst(n *Node) {
// only export typed constants
t := n.Type
if t == nil {
return
}
if n.Sym.Pkg != localpkg {
return
}
// only export integer constants for now
switch t.Etype {
case TINT8:
case TINT16:
case TINT32:
case TINT64:
case TINT:
case TUINT8:
case TUINT16:
case TUINT32:
case TUINT64:
case TUINT:
case TUINTPTR:
// ok
case TIDEAL:
if !Isconst(n, CTINT) {
return
}
x := n.Val().U.(*Mpint)
if x.Cmp(minintval[TINT]) < 0 || x.Cmp(maxintval[TINT]) > 0 {
return
}
// Ideal integers we export as int (if they fit).
t = types.Types[TINT]
default:
return
}
Ctxt.DwarfIntConst(myimportpath, n.Sym.Name, typesymname(t), n.Int64())
}
func dumpglobls() {
// add globals
for _, n := range externdcl {
switch n.Op {
case ONAME:
dumpGlobal(n)
case OLITERAL:
dumpGlobalConst(n)
}
}
obj.SortSlice(funcsyms, func(i, j int) bool {
return funcsyms[i].LinksymName() < funcsyms[j].LinksymName()
})
for _, s := range funcsyms {
sf := s.Pkg.Lookup(funcsymname(s)).Linksym()
dsymptr(sf, 0, s.Linksym(), 0)
ggloblsym(sf, int32(Widthptr), obj.DUPOK|obj.RODATA)
}
// Do not reprocess funcsyms on next dumpglobls call.
funcsyms = nil
}
// addGCLocals adds gcargs and gclocals symbols to Ctxt.Data.
// It takes care not to add any duplicates.
// Though the object file format handles duplicates efficiently,
// storing only a single copy of the data,
// failure to remove these duplicates adds a few percent to object file size.
func addGCLocals() {
seen := make(map[string]bool)
for _, s := range Ctxt.Text {
if s.Func == nil {
continue
}
for _, gcsym := range []*obj.LSym{&s.Func.GCArgs, &s.Func.GCLocals, &s.Func.GCRegs} {
if seen[gcsym.Name] {
continue
}
Ctxt.Data = append(Ctxt.Data, gcsym)
seen[gcsym.Name] = true
}
}
}
func duintxx(s *obj.LSym, off int, v uint64, wid int) int {
if off&(wid-1) != 0 {
Fatalf("duintxxLSym: misaligned: v=%d wid=%d off=%d", v, wid, off)
}
s.WriteInt(Ctxt, int64(off), wid, int64(v))
return off + wid
}
func duint8(s *obj.LSym, off int, v uint8) int {
return duintxx(s, off, uint64(v), 1)
}
func duint16(s *obj.LSym, off int, v uint16) int {
return duintxx(s, off, uint64(v), 2)
}
func duint32(s *obj.LSym, off int, v uint32) int {
return duintxx(s, off, uint64(v), 4)
}
func duintptr(s *obj.LSym, off int, v uint64) int {
return duintxx(s, off, v, Widthptr)
}
func dbvec(s *obj.LSym, off int, bv bvec) int {
// Runtime reads the bitmaps as byte arrays. Oblige.
for j := 0; int32(j) < bv.n; j += 8 {
word := bv.b[j/32]
off = duint8(s, off, uint8(word>>(uint(j)%32)))
}
return off
}
func stringsym(pos src.XPos, s string) (data *obj.LSym) {
var symname string
if len(s) > 100 {
// Huge strings are hashed to avoid long names in object files.
// Indulge in some paranoia by writing the length of s, too,
// as protection against length extension attacks.
h := sha256.New()
io.WriteString(h, s)
symname = fmt.Sprintf(".gostring.%d.%x", len(s), h.Sum(nil))
} else {
// Small strings get named directly by their contents.
symname = strconv.Quote(s)
}
const prefix = "go.string."
symdataname := prefix + symname
symdata := Ctxt.Lookup(symdataname)
if !symdata.SeenGlobl() {
// string data
off := dsname(symdata, 0, s, pos, "string")
ggloblsym(symdata, int32(off), obj.DUPOK|obj.RODATA|obj.LOCAL)
}
return symdata
}
var slicebytes_gen int
func slicebytes(nam *Node, s string, len int) {
slicebytes_gen++
symname := fmt.Sprintf(".gobytes.%d", slicebytes_gen)
sym := localpkg.Lookup(symname)
sym.Def = asTypesNode(newname(sym))
lsym := sym.Linksym()
off := dsname(lsym, 0, s, nam.Pos, "slice")
ggloblsym(lsym, int32(off), obj.NOPTR|obj.LOCAL)
if nam.Op != ONAME {
Fatalf("slicebytes %v", nam)
}
nsym := nam.Sym.Linksym()
off = int(nam.Xoffset)
off = dsymptr(nsym, off, lsym, 0)
off = duintptr(nsym, off, uint64(len))
duintptr(nsym, off, uint64(len))
}
func dsname(s *obj.LSym, off int, t string, pos src.XPos, what string) int {
// Objects that are too large will cause the data section to overflow right away,
// causing a cryptic error message by the linker. Check for oversize objects here
// and provide a useful error message instead.
if int64(len(t)) > 2e9 {
yyerrorl(pos, "%v with length %v is too big", what, len(t))
return 0
}
s.WriteString(Ctxt, int64(off), len(t), t)
return off + len(t)
}
func dsymptr(s *obj.LSym, off int, x *obj.LSym, xoff int) int {
off = int(Rnd(int64(off), int64(Widthptr)))
s.WriteAddr(Ctxt, int64(off), Widthptr, x, int64(xoff))
off += Widthptr
return off
}
func dsymptrOff(s *obj.LSym, off int, x *obj.LSym) int {
s.WriteOff(Ctxt, int64(off), x, 0)
off += 4
return off
}
func dsymptrWeakOff(s *obj.LSym, off int, x *obj.LSym) int {
s.WriteWeakOff(Ctxt, int64(off), x, 0)
off += 4
return off
}
func gdata(nam *Node, nr *Node, wid int) {
if nam.Op != ONAME {
Fatalf("gdata nam op %v", nam.Op)
}
if nam.Sym == nil {
Fatalf("gdata nil nam sym")
}
s := nam.Sym.Linksym()
switch nr.Op {
case OLITERAL:
switch u := nr.Val().U.(type) {
case bool:
i := int64(obj.Bool2int(u))
s.WriteInt(Ctxt, nam.Xoffset, wid, i)
case *Mpint:
s.WriteInt(Ctxt, nam.Xoffset, wid, u.Int64())
case *Mpflt:
f := u.Float64()
switch nam.Type.Etype {
case TFLOAT32:
s.WriteFloat32(Ctxt, nam.Xoffset, float32(f))
case TFLOAT64:
s.WriteFloat64(Ctxt, nam.Xoffset, f)
}
case *Mpcplx:
r := u.Real.Float64()
i := u.Imag.Float64()
switch nam.Type.Etype {
case TCOMPLEX64:
s.WriteFloat32(Ctxt, nam.Xoffset, float32(r))
s.WriteFloat32(Ctxt, nam.Xoffset+4, float32(i))
case TCOMPLEX128:
s.WriteFloat64(Ctxt, nam.Xoffset, r)
s.WriteFloat64(Ctxt, nam.Xoffset+8, i)
}
case string:
symdata := stringsym(nam.Pos, u)
s.WriteAddr(Ctxt, nam.Xoffset, Widthptr, symdata, 0)
s.WriteInt(Ctxt, nam.Xoffset+int64(Widthptr), Widthptr, int64(len(u)))
default:
Fatalf("gdata unhandled OLITERAL %v", nr)
}
case OADDR:
if nr.Left.Op != ONAME {
Fatalf("gdata ADDR left op %v", nr.Left.Op)
}
to := nr.Left
s.WriteAddr(Ctxt, nam.Xoffset, wid, to.Sym.Linksym(), to.Xoffset)
case ONAME:
if nr.Class() != PFUNC {
Fatalf("gdata NAME not PFUNC %d", nr.Class())
}
s.WriteAddr(Ctxt, nam.Xoffset, wid, funcsym(nr.Sym).Linksym(), nr.Xoffset)
default:
Fatalf("gdata unhandled op %v %v\n", nr, nr.Op)
}
}