go/src/cmd/link/internal/ld/pcln.go

665 lines
20 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ld
import (
"cmd/internal/obj"
"cmd/internal/objabi"
cmd/internal/src: cache prefixed filenames CL 37234 introduced string concatenation into some hot code. This CL does that work earlier and caches the result. Updates #19386 Performance impact vs master: name old time/op new time/op delta Template 223ms ± 5% 216ms ± 5% -2.98% (p=0.001 n=20+20) Unicode 98.7ms ± 4% 99.0ms ± 4% ~ (p=0.749 n=20+19) GoTypes 631ms ± 4% 626ms ± 4% ~ (p=0.253 n=20+20) Compiler 2.91s ± 1% 2.87s ± 3% -1.11% (p=0.005 n=18+20) SSA 4.48s ± 2% 4.36s ± 2% -2.77% (p=0.000 n=20+20) Flate 130ms ± 2% 129ms ± 6% ~ (p=0.428 n=19+20) GoParser 160ms ± 4% 157ms ± 3% -1.62% (p=0.005 n=20+18) Reflect 395ms ± 2% 394ms ± 4% ~ (p=0.445 n=20+20) Tar 120ms ± 5% 118ms ± 6% ~ (p=0.101 n=19+20) XML 224ms ± 3% 223ms ± 3% ~ (p=0.544 n=19+19) name old user-ns/op new user-ns/op delta Template 291user-ms ± 5% 265user-ms ± 5% -9.02% (p=0.000 n=20+19) Unicode 140user-ms ± 3% 139user-ms ± 8% ~ (p=0.904 n=20+20) GoTypes 844user-ms ± 3% 849user-ms ± 3% ~ (p=0.251 n=20+18) Compiler 4.06user-s ± 5% 3.98user-s ± 2% ~ (p=0.056 n=20+20) SSA 6.89user-s ± 5% 6.50user-s ± 3% -5.61% (p=0.000 n=20+20) Flate 164user-ms ± 5% 163user-ms ± 4% ~ (p=0.365 n=20+19) GoParser 206user-ms ± 6% 204user-ms ± 4% ~ (p=0.534 n=20+18) Reflect 501user-ms ± 4% 505user-ms ± 5% ~ (p=0.383 n=20+20) Tar 151user-ms ± 3% 152user-ms ± 7% ~ (p=0.798 n=17+20) XML 283user-ms ± 7% 280user-ms ± 5% ~ (p=0.301 n=20+20) name old alloc/op new alloc/op delta Template 42.5MB ± 0% 40.2MB ± 0% -5.59% (p=0.000 n=20+20) Unicode 31.7MB ± 0% 31.0MB ± 0% -2.19% (p=0.000 n=20+18) GoTypes 124MB ± 0% 117MB ± 0% -5.90% (p=0.000 n=20+20) Compiler 533MB ± 0% 490MB ± 0% -8.07% (p=0.000 n=20+20) SSA 989MB ± 0% 893MB ± 0% -9.74% (p=0.000 n=20+20) Flate 27.8MB ± 0% 26.1MB ± 0% -5.92% (p=0.000 n=20+20) GoParser 34.3MB ± 0% 32.1MB ± 0% -6.43% (p=0.000 n=19+20) Reflect 84.6MB ± 0% 81.4MB ± 0% -3.84% (p=0.000 n=20+20) Tar 28.8MB ± 0% 27.7MB ± 0% -3.89% (p=0.000 n=20+20) XML 47.2MB ± 0% 44.2MB ± 0% -6.45% (p=0.000 n=20+19) name old allocs/op new allocs/op delta Template 420k ± 1% 381k ± 1% -9.35% (p=0.000 n=20+20) Unicode 338k ± 1% 324k ± 1% -4.29% (p=0.000 n=20+19) GoTypes 1.28M ± 0% 1.15M ± 0% -10.30% (p=0.000 n=20+20) Compiler 5.06M ± 0% 4.41M ± 0% -12.92% (p=0.000 n=20+20) SSA 9.14M ± 0% 7.91M ± 0% -13.46% (p=0.000 n=19+20) Flate 267k ± 0% 241k ± 1% -9.53% (p=0.000 n=20+20) GoParser 347k ± 1% 312k ± 0% -10.15% (p=0.000 n=19+20) Reflect 1.07M ± 0% 1.00M ± 0% -6.86% (p=0.000 n=20+20) Tar 274k ± 1% 256k ± 1% -6.73% (p=0.000 n=20+20) XML 448k ± 0% 398k ± 0% -11.17% (p=0.000 n=20+18) Performance impact when applied together with CL 37234 atop CL 37234's parent commit (i.e. as if it were a part of CL 37234), to show that this commit makes CL 37234 completely performance-neutral: name old time/op new time/op delta Template 222ms ±14% 222ms ±14% ~ (p=1.000 n=14+15) Unicode 104ms ±18% 106ms ±18% ~ (p=0.650 n=13+14) GoTypes 653ms ± 7% 638ms ± 5% ~ (p=0.145 n=14+12) Compiler 3.10s ± 1% 3.13s ±10% ~ (p=1.000 n=2+2) SSA 4.73s ±11% 4.68s ±11% ~ (p=0.567 n=15+15) Flate 136ms ± 4% 133ms ± 7% ~ (p=0.231 n=12+14) GoParser 163ms ±11% 169ms ±10% ~ (p=0.352 n=14+14) Reflect 415ms ±15% 423ms ±20% ~ (p=0.715 n=15+14) Tar 133ms ±17% 130ms ±23% ~ (p=0.252 n=14+15) XML 236ms ±16% 235ms ±14% ~ (p=0.874 n=14+14) name old user-ns/op new user-ns/op delta Template 271user-ms ±10% 271user-ms ±10% ~ (p=0.780 n=14+15) Unicode 143user-ms ± 5% 146user-ms ±11% ~ (p=0.432 n=12+14) GoTypes 864user-ms ± 5% 866user-ms ± 9% ~ (p=0.905 n=14+13) Compiler 4.17user-s ± 1% 4.26user-s ± 7% ~ (p=1.000 n=2+2) SSA 6.79user-s ± 8% 6.79user-s ± 6% ~ (p=0.902 n=15+15) Flate 169user-ms ± 8% 164user-ms ± 5% -3.13% (p=0.014 n=14+14) GoParser 212user-ms ± 7% 217user-ms ±22% ~ (p=1.000 n=13+15) Reflect 521user-ms ± 7% 533user-ms ±15% ~ (p=0.511 n=14+14) Tar 165user-ms ±17% 161user-ms ±15% ~ (p=0.345 n=15+15) XML 294user-ms ±11% 292user-ms ±10% ~ (p=0.839 n=14+14) name old alloc/op new alloc/op delta Template 39.9MB ± 0% 39.9MB ± 0% ~ (p=0.621 n=15+14) Unicode 31.0MB ± 0% 31.0MB ± 0% ~ (p=0.098 n=13+15) GoTypes 117MB ± 0% 117MB ± 0% ~ (p=0.775 n=15+15) Compiler 488MB ± 0% 488MB ± 0% ~ (p=0.333 n=2+2) SSA 892MB ± 0% 892MB ± 0% +0.03% (p=0.000 n=15+15) Flate 26.1MB ± 0% 26.1MB ± 0% ~ (p=0.098 n=15+15) GoParser 31.8MB ± 0% 31.8MB ± 0% ~ (p=0.525 n=15+13) Reflect 81.2MB ± 0% 81.2MB ± 0% +0.06% (p=0.001 n=12+14) Tar 27.5MB ± 0% 27.5MB ± 0% ~ (p=0.595 n=15+15) XML 44.1MB ± 0% 44.1MB ± 0% ~ (p=0.486 n=15+15) name old allocs/op new allocs/op delta Template 378k ± 1% 378k ± 0% ~ (p=0.949 n=15+14) Unicode 324k ± 0% 324k ± 1% ~ (p=0.057 n=14+15) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.461 n=15+15) Compiler 4.39M ± 0% 4.39M ± 0% ~ (p=0.333 n=2+2) SSA 7.90M ± 0% 7.90M ± 0% +0.06% (p=0.008 n=15+15) Flate 240k ± 1% 241k ± 0% ~ (p=0.233 n=15+15) GoParser 309k ± 1% 309k ± 0% ~ (p=0.867 n=15+12) Reflect 1.00M ± 0% 1.00M ± 0% ~ (p=0.139 n=12+15) Tar 254k ± 1% 253k ± 1% ~ (p=0.345 n=15+15) XML 398k ± 0% 397k ± 1% ~ (p=0.267 n=15+15) Change-Id: Ic999a0f456a371c99eebba0f9747263a13836e33 Reviewed-on: https://go-review.googlesource.com/37766 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2017-03-04 07:09:33 -08:00
"cmd/internal/src"
"cmd/internal/sys"
"cmd/link/internal/loader"
"cmd/link/internal/sym"
"encoding/binary"
cmd/compile, cmd/link, runtime: make defers low-cost through inline code and extra funcdata Generate inline code at defer time to save the args of defer calls to unique (autotmp) stack slots, and generate inline code at exit time to check which defer calls were made and make the associated function/method/interface calls. We remember that a particular defer statement was reached by storing in the deferBits variable (always stored on the stack). At exit time, we check the bits of the deferBits variable to determine which defer function calls to make (in reverse order). These low-cost defers are only used for functions where no defers appear in loops. In addition, we don't do these low-cost defers if there are too many defer statements or too many exits in a function (to limit code increase). When a function uses open-coded defers, we produce extra FUNCDATA_OpenCodedDeferInfo information that specifies the number of defers, and for each defer, the stack slots where the closure and associated args have been stored. The funcdata also includes the location of the deferBits variable. Therefore, for panics, we can use this funcdata to determine exactly which defers are active, and call the appropriate functions/methods/closures with the correct arguments for each active defer. In order to unwind the stack correctly after a recover(), we need to add an extra code segment to functions with open-coded defers that simply calls deferreturn() and returns. This segment is not reachable by the normal function, but is returned to by the runtime during recovery. We set the liveness information of this deferreturn() to be the same as the liveness at the first function call during the last defer exit code (so all return values and all stack slots needed by the defer calls will be live). I needed to increase the stackguard constant from 880 to 896, because of a small amount of new code in deferreturn(). The -N flag disables open-coded defers. '-d defer' prints out the kind of defer being used at each defer statement (heap-allocated, stack-allocated, or open-coded). Cost of defer statement [ go test -run NONE -bench BenchmarkDefer$ runtime ] With normal (stack-allocated) defers only: 35.4 ns/op With open-coded defers: 5.6 ns/op Cost of function call alone (remove defer keyword): 4.4 ns/op Text size increase (including funcdata) for go binary without/with open-coded defers: 0.09% The average size increase (including funcdata) for only the functions that use open-coded defers is 1.1%. The cost of a panic followed by a recover got noticeably slower, since panic processing now requires a scan of the stack for open-coded defer frames. This scan is required, even if no frames are using open-coded defers: Cost of panic and recover [ go test -run NONE -bench BenchmarkPanicRecover runtime ] Without open-coded defers: 62.0 ns/op With open-coded defers: 255 ns/op A CGO Go-to-C-to-Go benchmark got noticeably faster because of open-coded defers: CGO Go-to-C-to-Go benchmark [cd misc/cgo/test; go test -run NONE -bench BenchmarkCGoCallback ] Without open-coded defers: 443 ns/op With open-coded defers: 347 ns/op Updates #14939 (defer performance) Updates #34481 (design doc) Change-Id: I63b1a60d1ebf28126f55ee9fd7ecffe9cb23d1ff Reviewed-on: https://go-review.googlesource.com/c/go/+/202340 Reviewed-by: Austin Clements <austin@google.com>
2019-06-24 12:59:22 -07:00
"fmt"
"log"
"os"
"path/filepath"
"strings"
)
// pclnState holds state information used during pclntab generation.
// Here 'ldr' is just a pointer to the context's loader, 'container'
// is a bitmap holding whether a given symbol index is an outer or
// container symbol, 'deferReturnSym' is the index for the symbol
// "runtime.deferreturn", 'nameToOffset' is a helper function for
// capturing function names, 'numberedFiles' records the file number
// assigned to a given file symbol, 'filepaths' is a slice of
// expanded paths (indexed by file number).
type pclnState struct {
ldr *loader.Loader
container loader.Bitmap
deferReturnSym loader.Sym
nameToOffset func(name string) int32
numberedFiles map[loader.Sym]int64
filepaths []string
}
func makepclnState(ctxt *Link) pclnState {
ldr := ctxt.loader
drs := ldr.Lookup("runtime.deferreturn", sym.SymVerABIInternal)
return pclnState{
container: loader.MakeBitmap(ldr.NSym()),
ldr: ldr,
deferReturnSym: drs,
numberedFiles: make(map[loader.Sym]int64),
// NB: initial entry in filepaths below is to reserve the zero value,
// so that when we do a map lookup in numberedFiles fails, it will not
// return a value slot in filepaths.
filepaths: []string{""},
}
}
func (state *pclnState) ftabaddstring(ftab *loader.SymbolBuilder, s string) int32 {
start := len(ftab.Data())
ftab.Grow(int64(start + len(s) + 1)) // make room for s plus trailing NUL
ftd := ftab.Data()
copy(ftd[start:], s)
return int32(start)
}
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
// numberfile assigns a file number to the file if it hasn't been assigned already.
func (state *pclnState) numberfile(file loader.Sym) int64 {
if val, ok := state.numberedFiles[file]; ok {
return val
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
}
sn := state.ldr.SymName(file)
path := sn[len(src.FileSymPrefix):]
val := int64(len(state.filepaths))
state.numberedFiles[file] = val
state.filepaths = append(state.filepaths, expandGoroot(path))
return val
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
}
func (state *pclnState) fileVal(file loader.Sym) int64 {
if val, ok := state.numberedFiles[file]; ok {
return val
}
panic("should have been numbered first")
}
func (state *pclnState) renumberfiles(ctxt *Link, fi loader.FuncInfo, d *sym.Pcdata) {
// Give files numbers.
nf := fi.NumFile()
for i := uint32(0); i < nf; i++ {
state.numberfile(fi.File(int(i)))
}
buf := make([]byte, binary.MaxVarintLen32)
newval := int32(-1)
var out sym.Pcdata
it := obj.NewPCIter(uint32(ctxt.Arch.MinLC))
for it.Init(d.P); !it.Done; it.Next() {
// value delta
oldval := it.Value
var val int32
if oldval == -1 {
val = -1
} else {
if oldval < 0 || oldval >= int32(nf) {
log.Fatalf("bad pcdata %d", oldval)
}
val = int32(state.fileVal(fi.File(int(oldval))))
}
dv := val - newval
newval = val
// value
n := binary.PutVarint(buf, int64(dv))
out.P = append(out.P, buf[:n]...)
// pc delta
pc := (it.NextPC - it.PC) / it.PCScale
n = binary.PutUvarint(buf, uint64(pc))
out.P = append(out.P, buf[:n]...)
}
// terminating value delta
// we want to write varint-encoded 0, which is just 0
out.P = append(out.P, 0)
*d = out
}
// onlycsymbol looks at a symbol's name to report whether this is a
// symbol that is referenced by C code
func onlycsymbol(sname string) bool {
switch sname {
case "_cgo_topofstack", "__cgo_topofstack", "_cgo_panic", "crosscall2":
return true
}
if strings.HasPrefix(sname, "_cgoexp_") {
return true
}
return false
}
func emitPcln(ctxt *Link, s loader.Sym, container loader.Bitmap) bool {
if ctxt.BuildMode == BuildModePlugin && ctxt.HeadType == objabi.Hdarwin && onlycsymbol(ctxt.loader.SymName(s)) {
return false
}
// We want to generate func table entries only for the "lowest
// level" symbols, not containers of subsymbols.
return !container.Has(s)
}
func (state *pclnState) computeDeferReturn(target *Target, s loader.Sym) uint32 {
deferreturn := uint32(0)
lastWasmAddr := uint32(0)
relocs := state.ldr.Relocs(s)
for ri := 0; ri < relocs.Count(); ri++ {
r := relocs.At2(ri)
if target.IsWasm() && r.Type() == objabi.R_ADDR {
// Wasm does not have a live variable set at the deferreturn
// call itself. Instead it has one identified by the
// resumption point immediately preceding the deferreturn.
// The wasm code has a R_ADDR relocation which is used to
// set the resumption point to PC_B.
lastWasmAddr = uint32(r.Add())
}
if r.Type().IsDirectCall() && r.Sym() == state.deferReturnSym {
if target.IsWasm() {
deferreturn = lastWasmAddr - 1
} else {
// Note: the relocation target is in the call instruction, but
// is not necessarily the whole instruction (for instance, on
// x86 the relocation applies to bytes [1:5] of the 5 byte call
// instruction).
deferreturn = uint32(r.Off())
switch target.Arch.Family {
case sys.AMD64, sys.I386:
deferreturn--
case sys.PPC64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64:
// no change
case sys.RISCV64:
// TODO(jsing): The JALR instruction is marked with
// R_CALLRISCV, whereas the actual reloc is currently
// one instruction earlier starting with the AUIPC.
deferreturn -= 4
case sys.S390X:
deferreturn -= 2
default:
panic(fmt.Sprint("Unhandled architecture:", target.Arch.Family))
}
}
break // only need one
}
}
return deferreturn
}
// genInlTreeSym generates the InlTree sym for a function with the
// specified FuncInfo.
func (state *pclnState) genInlTreeSym(fi loader.FuncInfo, arch *sys.Arch) loader.Sym {
ldr := state.ldr
its := ldr.CreateExtSym("", 0)
inlTreeSym := ldr.MakeSymbolUpdater(its)
// Note: the generated symbol is given a type of sym.SGOFUNC, as a
// signal to the symtab() phase that it needs to be grouped in with
// other similar symbols (gcdata, etc); the dodata() phase will
// eventually switch the type back to SRODATA.
inlTreeSym.SetType(sym.SGOFUNC)
ldr.SetAttrReachable(its, true)
ninl := fi.NumInlTree()
for i := 0; i < int(ninl); i++ {
call := fi.InlTree(i)
// Usually, call.File is already numbered since the file
// shows up in the Pcfile table. However, two inlined calls
// might overlap exactly so that only the innermost file
// appears in the Pcfile table. In that case, this assigns
// the outer file a number.
val := state.numberfile(call.File)
fn := ldr.SymName(call.Func)
nameoff := state.nameToOffset(fn)
inlTreeSym.SetUint16(arch, int64(i*20+0), uint16(call.Parent))
inlTreeSym.SetUint8(arch, int64(i*20+2), uint8(objabi.GetFuncID(fn, "")))
// byte 3 is unused
inlTreeSym.SetUint32(arch, int64(i*20+4), uint32(val))
inlTreeSym.SetUint32(arch, int64(i*20+8), uint32(call.Line))
inlTreeSym.SetUint32(arch, int64(i*20+12), uint32(nameoff))
inlTreeSym.SetUint32(arch, int64(i*20+16), uint32(call.ParentPC))
}
return its
}
// pclntab initializes the pclntab symbol with
// runtime function and file name information.
// These variables are used to initialize runtime.firstmoduledata, see symtab.go:symtab.
var pclntabNfunc int32
var pclntabFiletabOffset int32
var pclntabPclntabOffset int32
var pclntabFirstFunc *sym.Symbol
var pclntabLastFunc *sym.Symbol
var pclntabFirstFunc2 loader.Sym
var pclntabLastFunc2 loader.Sym
// pclntab generates the pcln table for the link output. Return value
// is a bitmap indexed by global symbol that marks 'container' text
// symbols, e.g. the set of all symbols X such that Outer(S) = X for
// some other text symbol S.
func (ctxt *Link) pclntab() loader.Bitmap {
funcdataBytes := int64(0)
ldr := ctxt.loader
ftabsym := ldr.LookupOrCreateSym("runtime.pclntab", 0)
ftab := ldr.MakeSymbolUpdater(ftabsym)
ftab.SetType(sym.SPCLNTAB)
ldr.SetAttrReachable(ftabsym, true)
state := makepclnState(ctxt)
// See golang.org/s/go12symtab for the format. Briefly:
// 8-byte header
// nfunc [thearch.ptrsize bytes]
// function table, alternating PC and offset to func struct [each entry thearch.ptrsize bytes]
// end PC [thearch.ptrsize bytes]
// offset to file table [4 bytes]
// Find container symbols and mark them as such.
for _, s := range ctxt.Textp2 {
outer := ldr.OuterSym(s)
if outer != 0 {
state.container.Set(outer)
}
}
// Gather some basic stats and info.
var nfunc int32
prevSect := ldr.SymSect(ctxt.Textp2[0])
for _, s := range ctxt.Textp2 {
if !emitPcln(ctxt, s, state.container) {
continue
}
nfunc++
if pclntabFirstFunc2 == 0 {
pclntabFirstFunc2 = s
}
ss := ldr.SymSect(s)
if ss != prevSect {
// With multiple text sections, the external linker may
// insert functions between the sections, which are not
// known by Go. This leaves holes in the PC range covered
// by the func table. We need to generate an entry to mark
// the hole.
nfunc++
prevSect = ss
}
}
pclntabNfunc = nfunc
ftab.Grow(8 + int64(ctxt.Arch.PtrSize) + int64(nfunc)*2*int64(ctxt.Arch.PtrSize) + int64(ctxt.Arch.PtrSize) + 4)
ftab.SetUint32(ctxt.Arch, 0, 0xfffffffb)
ftab.SetUint8(ctxt.Arch, 6, uint8(ctxt.Arch.MinLC))
ftab.SetUint8(ctxt.Arch, 7, uint8(ctxt.Arch.PtrSize))
ftab.SetUint(ctxt.Arch, 8, uint64(nfunc))
pclntabPclntabOffset = int32(8 + ctxt.Arch.PtrSize)
szHint := len(ctxt.Textp2) * 2
funcnameoff := make(map[string]int32, szHint)
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
nameToOffset := func(name string) int32 {
nameoff, ok := funcnameoff[name]
if !ok {
nameoff = state.ftabaddstring(ftab, name)
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
funcnameoff[name] = nameoff
}
return nameoff
}
state.nameToOffset = nameToOffset
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
pctaboff := make(map[string]uint32, szHint)
writepctab := func(off int32, p []byte) int32 {
start, ok := pctaboff[string(p)]
if !ok {
if len(p) > 0 {
start = uint32(len(ftab.Data()))
ftab.AddBytes(p)
}
pctaboff[string(p)] = start
}
newoff := int32(ftab.SetUint32(ctxt.Arch, int64(off), start))
return newoff
}
setAddr := (*loader.SymbolBuilder).SetAddrPlus
if ctxt.IsExe() && ctxt.IsInternal() {
// Internal linking static executable. At this point the function
// addresses are known, so we can just use them instead of emitting
// relocations.
// For other cases we are generating a relocatable binary so we
// still need to emit relocations.
setAddr = func(s *loader.SymbolBuilder, arch *sys.Arch, off int64, tgt loader.Sym, add int64) int64 {
if v := ldr.SymValue(tgt); v != 0 {
return s.SetUint(arch, off, uint64(v+add))
}
return s.SetAddrPlus(arch, off, tgt, add)
}
}
pcsp := sym.Pcdata{}
pcfile := sym.Pcdata{}
pcline := sym.Pcdata{}
pcdata := []sym.Pcdata{}
funcdata := []loader.Sym{}
funcdataoff := []int64{}
nfunc = 0 // repurpose nfunc as a running index
prevFunc := ctxt.Textp2[0]
for _, s := range ctxt.Textp2 {
if !emitPcln(ctxt, s, state.container) {
continue
}
thisSect := ldr.SymSect(s)
prevSect := ldr.SymSect(prevFunc)
if thisSect != prevSect {
// With multiple text sections, there may be a hole here
// in the address space (see the comment above). We use an
// invalid funcoff value to mark the hole. See also
// runtime/symtab.go:findfunc
prevFuncSize := int64(ldr.SymSize(prevFunc))
setAddr(ftab, ctxt.Arch, 8+int64(ctxt.Arch.PtrSize)+int64(nfunc)*2*int64(ctxt.Arch.PtrSize), prevFunc, prevFuncSize)
ftab.SetUint(ctxt.Arch, 8+int64(ctxt.Arch.PtrSize)+int64(nfunc)*2*int64(ctxt.Arch.PtrSize)+int64(ctxt.Arch.PtrSize), ^uint64(0))
nfunc++
}
prevFunc = s
pcsp.P = pcsp.P[:0]
pcline.P = pcline.P[:0]
pcfile.P = pcfile.P[:0]
pcdata = pcdata[:0]
funcdataoff = funcdataoff[:0]
funcdata = funcdata[:0]
fi := ldr.FuncInfo(s)
if fi.Valid() {
fi.Preload()
npc := fi.NumPcdata()
for i := uint32(0); i < npc; i++ {
pcdata = append(pcdata, sym.Pcdata{P: fi.Pcdata(int(i))})
}
nfd := fi.NumFuncdataoff()
for i := uint32(0); i < nfd; i++ {
funcdataoff = append(funcdataoff, fi.Funcdataoff(int(i)))
}
funcdata = fi.Funcdata(funcdata)
}
if fi.Valid() && fi.NumInlTree() > 0 {
if len(pcdata) <= objabi.PCDATA_InlTreeIndex {
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
// Create inlining pcdata table.
newpcdata := make([]sym.Pcdata, objabi.PCDATA_InlTreeIndex+1)
copy(newpcdata, pcdata)
pcdata = newpcdata
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
}
if len(funcdataoff) <= objabi.FUNCDATA_InlTree {
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
// Create inline tree funcdata.
newfuncdata := make([]loader.Sym, objabi.FUNCDATA_InlTree+1)
newfuncdataoff := make([]int64, objabi.FUNCDATA_InlTree+1)
copy(newfuncdata, funcdata)
copy(newfuncdataoff, funcdataoff)
funcdata = newfuncdata
funcdataoff = newfuncdataoff
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
}
}
dSize := len(ftab.Data())
funcstart := int32(dSize)
funcstart += int32(-dSize) & (int32(ctxt.Arch.PtrSize) - 1) // align to ptrsize
setAddr(ftab, ctxt.Arch, 8+int64(ctxt.Arch.PtrSize)+int64(nfunc)*2*int64(ctxt.Arch.PtrSize), s, 0)
ftab.SetUint(ctxt.Arch, 8+int64(ctxt.Arch.PtrSize)+int64(nfunc)*2*int64(ctxt.Arch.PtrSize)+int64(ctxt.Arch.PtrSize), uint64(funcstart))
// Write runtime._func. Keep in sync with ../../../../runtime/runtime2.go:/_func
// and package debug/gosym.
// fixed size of struct, checked below
off := funcstart
end := funcstart + int32(ctxt.Arch.PtrSize) + 3*4 + 5*4 + int32(len(pcdata))*4 + int32(len(funcdata))*int32(ctxt.Arch.PtrSize)
if len(funcdata) > 0 && (end&int32(ctxt.Arch.PtrSize-1) != 0) {
end += 4
}
ftab.Grow(int64(end))
// entry uintptr
off = int32(setAddr(ftab, ctxt.Arch, int64(off), s, 0))
// name int32
sn := ldr.SymName(s)
nameoff := nameToOffset(sn)
off = int32(ftab.SetUint32(ctxt.Arch, int64(off), uint32(nameoff)))
// args int32
// TODO: Move into funcinfo.
args := uint32(0)
if fi.Valid() {
args = uint32(fi.Args())
}
off = int32(ftab.SetUint32(ctxt.Arch, int64(off), args))
// deferreturn
deferreturn := state.computeDeferReturn(&ctxt.Target, s)
off = int32(ftab.SetUint32(ctxt.Arch, int64(off), deferreturn))
if fi.Valid() {
pcsp = sym.Pcdata{P: fi.Pcsp()}
pcfile = sym.Pcdata{P: fi.Pcfile()}
pcline = sym.Pcdata{P: fi.Pcline()}
state.renumberfiles(ctxt, fi, &pcfile)
if false {
// Sanity check the new numbering
it := obj.NewPCIter(uint32(ctxt.Arch.MinLC))
for it.Init(pcfile.P); !it.Done; it.Next() {
if it.Value < 1 || it.Value > int32(len(state.numberedFiles)) {
ctxt.Errorf(s, "bad file number in pcfile: %d not in range [1, %d]\n", it.Value, len(state.numberedFiles))
errorexit()
}
}
}
}
if fi.Valid() && fi.NumInlTree() > 0 {
its := state.genInlTreeSym(fi, ctxt.Arch)
funcdata[objabi.FUNCDATA_InlTree] = its
pcdata[objabi.PCDATA_InlTreeIndex] = sym.Pcdata{P: fi.Pcinline()}
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
}
// pcdata
off = writepctab(off, pcsp.P)
off = writepctab(off, pcfile.P)
off = writepctab(off, pcline.P)
off = int32(ftab.SetUint32(ctxt.Arch, int64(off), uint32(len(pcdata))))
// funcID uint8
var file string
if fi.Valid() && fi.NumFile() > 0 {
filesymname := ldr.SymName(fi.File(0))
file = filesymname[len(src.FileSymPrefix):]
}
funcID := objabi.GetFuncID(sn, file)
off = int32(ftab.SetUint8(ctxt.Arch, int64(off), uint8(funcID)))
// unused
off += 2
// nfuncdata must be the final entry.
off = int32(ftab.SetUint8(ctxt.Arch, int64(off), uint8(len(funcdata))))
for i := range pcdata {
off = writepctab(off, pcdata[i].P)
}
// funcdata, must be pointer-aligned and we're only int32-aligned.
// Missing funcdata will be 0 (nil pointer).
if len(funcdata) > 0 {
if off&int32(ctxt.Arch.PtrSize-1) != 0 {
off += 4
}
for i := range funcdata {
dataoff := int64(off) + int64(ctxt.Arch.PtrSize)*int64(i)
if funcdata[i] == 0 {
ftab.SetUint(ctxt.Arch, dataoff, uint64(funcdataoff[i]))
continue
}
// TODO: Dedup.
funcdataBytes += int64(len(ldr.Data(funcdata[i])))
setAddr(ftab, ctxt.Arch, dataoff, funcdata[i], funcdataoff[i])
}
off += int32(len(funcdata)) * int32(ctxt.Arch.PtrSize)
}
if off != end {
ctxt.Errorf(s, "bad math in functab: funcstart=%d off=%d but end=%d (npcdata=%d nfuncdata=%d ptrsize=%d)", funcstart, off, end, len(pcdata), len(funcdata), ctxt.Arch.PtrSize)
errorexit()
}
nfunc++
}
last := ctxt.Textp2[len(ctxt.Textp2)-1]
pclntabLastFunc2 = last
// Final entry of table is just end pc.
setAddr(ftab, ctxt.Arch, 8+int64(ctxt.Arch.PtrSize)+int64(nfunc)*2*int64(ctxt.Arch.PtrSize), last, ldr.SymSize(last))
// Start file table.
dSize := len(ftab.Data())
start := int32(dSize)
start += int32(-dSize) & (int32(ctxt.Arch.PtrSize) - 1)
pclntabFiletabOffset = start
ftab.SetUint32(ctxt.Arch, 8+int64(ctxt.Arch.PtrSize)+int64(nfunc)*2*int64(ctxt.Arch.PtrSize)+int64(ctxt.Arch.PtrSize), uint32(start))
nf := len(state.numberedFiles)
ftab.Grow(int64(start) + int64((nf+1)*4))
ftab.SetUint32(ctxt.Arch, int64(start), uint32(nf+1))
for i := nf; i > 0; i-- {
path := state.filepaths[i]
val := int64(i)
ftab.SetUint32(ctxt.Arch, int64(start)+val*4, uint32(state.ftabaddstring(ftab, path)))
}
ftab.SetSize(int64(len(ftab.Data())))
ctxt.NumFilesyms = len(state.numberedFiles)
if ctxt.Debugvlog != 0 {
ctxt.Logf("pclntab=%d bytes, funcdata total %d bytes\n", ftab.Size(), funcdataBytes)
}
return state.container
}
cmd/link: set runtime.GOROOT default during link Suppose you build the Go toolchain in directory A, move the whole thing to directory B, and then use it from B to build a new program hello.exe, and then run hello.exe, and hello.exe crashes with a stack trace into the standard library. Long ago, you'd have seen hello.exe print file names in the A directory tree, even though the files had moved to the B directory tree. About two years ago we changed the compiler to write down these files with the name "$GOROOT" (that literal string) instead of A, so that the final link from B could replace "$GOROOT" with B, so that hello.exe's crash would show the correct source file paths in the stack trace. (golang.org/cl/18200) Now suppose that you do the same thing but hello.exe doesn't crash: it prints fmt.Println(runtime.GOROOT()). And you run hello.exe after clearing $GOROOT from the environment. Long ago, you'd have seen hello.exe print A instead of B. Before this CL, you'd still see hello.exe print A instead of B. This case is the one instance where a moved toolchain still divulges its origin. Not anymore. After this CL, hello.exe will print B, because the linker sets runtime/internal/sys.DefaultGoroot with the effective GOROOT from link time. This makes the default result of runtime.GOROOT once again match the file names recorded in the binary, after two years of divergence. With that cleared up, we can reintroduce GOROOT into the link action ID and also reenable TestExecutableGOROOT/RelocatedExe. When $GOROOT_FINAL is set during link, it is used in preference to $GOROOT, as always, but it was easier to explain the behavior above without introducing that complication. Fixes #22155. Fixes #20284. Fixes #22475. Change-Id: Ifdaeb77fd4678fdb337cf59ee25b2cd873ec1016 Reviewed-on: https://go-review.googlesource.com/86835 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2018-01-08 11:59:29 -05:00
func gorootFinal() string {
root := objabi.GOROOT
if final := os.Getenv("GOROOT_FINAL"); final != "" {
root = final
}
return root
}
func expandGoroot(s string) string {
const n = len("$GOROOT")
if len(s) >= n+1 && s[:n] == "$GOROOT" && (s[n] == '/' || s[n] == '\\') {
cmd/link: set runtime.GOROOT default during link Suppose you build the Go toolchain in directory A, move the whole thing to directory B, and then use it from B to build a new program hello.exe, and then run hello.exe, and hello.exe crashes with a stack trace into the standard library. Long ago, you'd have seen hello.exe print file names in the A directory tree, even though the files had moved to the B directory tree. About two years ago we changed the compiler to write down these files with the name "$GOROOT" (that literal string) instead of A, so that the final link from B could replace "$GOROOT" with B, so that hello.exe's crash would show the correct source file paths in the stack trace. (golang.org/cl/18200) Now suppose that you do the same thing but hello.exe doesn't crash: it prints fmt.Println(runtime.GOROOT()). And you run hello.exe after clearing $GOROOT from the environment. Long ago, you'd have seen hello.exe print A instead of B. Before this CL, you'd still see hello.exe print A instead of B. This case is the one instance where a moved toolchain still divulges its origin. Not anymore. After this CL, hello.exe will print B, because the linker sets runtime/internal/sys.DefaultGoroot with the effective GOROOT from link time. This makes the default result of runtime.GOROOT once again match the file names recorded in the binary, after two years of divergence. With that cleared up, we can reintroduce GOROOT into the link action ID and also reenable TestExecutableGOROOT/RelocatedExe. When $GOROOT_FINAL is set during link, it is used in preference to $GOROOT, as always, but it was easier to explain the behavior above without introducing that complication. Fixes #22155. Fixes #20284. Fixes #22475. Change-Id: Ifdaeb77fd4678fdb337cf59ee25b2cd873ec1016 Reviewed-on: https://go-review.googlesource.com/86835 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2018-01-08 11:59:29 -05:00
return filepath.ToSlash(filepath.Join(gorootFinal(), s[n:]))
}
return s
}
const (
BUCKETSIZE = 256 * MINFUNC
SUBBUCKETS = 16
SUBBUCKETSIZE = BUCKETSIZE / SUBBUCKETS
NOIDX = 0x7fffffff
)
// findfunctab generates a lookup table to quickly find the containing
// function for a pc. See src/runtime/symtab.go:findfunc for details.
// 'container' is a bitmap indexed by global symbol holding whether
// a given text symbols is a container (outer sym).
func (ctxt *Link) findfunctab(container loader.Bitmap) {
ldr := ctxt.loader
tsym := ldr.LookupOrCreateSym("runtime.findfunctab", 0)
t := ldr.MakeSymbolUpdater(tsym)
t.SetType(sym.SRODATA)
ldr.SetAttrReachable(tsym, true)
ldr.SetAttrLocal(tsym, true)
// find min and max address
min := ldr.SymValue(ctxt.Textp2[0])
lastp := ctxt.Textp2[len(ctxt.Textp2)-1]
max := ldr.SymValue(lastp) + ldr.SymSize(lastp)
// for each subbucket, compute the minimum of all symbol indexes
// that map to that subbucket.
n := int32((max - min + SUBBUCKETSIZE - 1) / SUBBUCKETSIZE)
indexes := make([]int32, n)
for i := int32(0); i < n; i++ {
indexes[i] = NOIDX
}
idx := int32(0)
for i, s := range ctxt.Textp2 {
if !emitPcln(ctxt, s, container) {
continue
}
p := ldr.SymValue(s)
var e loader.Sym
i++
if i < len(ctxt.Textp2) {
e = ctxt.Textp2[i]
}
for e != 0 && !emitPcln(ctxt, e, container) && i < len(ctxt.Textp2) {
e = ctxt.Textp2[i]
i++
}
q := max
if e != 0 {
q = ldr.SymValue(e)
}
//print("%d: [%lld %lld] %s\n", idx, p, q, s->name);
for ; p < q; p += SUBBUCKETSIZE {
i = int((p - min) / SUBBUCKETSIZE)
if indexes[i] > idx {
indexes[i] = idx
}
}
i = int((q - 1 - min) / SUBBUCKETSIZE)
if indexes[i] > idx {
indexes[i] = idx
}
idx++
}
// allocate table
nbuckets := int32((max - min + BUCKETSIZE - 1) / BUCKETSIZE)
t.Grow(4*int64(nbuckets) + int64(n))
// fill in table
for i := int32(0); i < nbuckets; i++ {
base := indexes[i*SUBBUCKETS]
if base == NOIDX {
Errorf(nil, "hole in findfunctab")
}
t.SetUint32(ctxt.Arch, int64(i)*(4+SUBBUCKETS), uint32(base))
for j := int32(0); j < SUBBUCKETS && i*SUBBUCKETS+j < n; j++ {
idx = indexes[i*SUBBUCKETS+j]
if idx == NOIDX {
Errorf(nil, "hole in findfunctab")
}
if idx-base >= 256 {
Errorf(nil, "too many functions in a findfunc bucket! %d/%d %d %d", i, nbuckets, j, idx-base)
}
t.SetUint8(ctxt.Arch, int64(i)*(4+SUBBUCKETS)+4+int64(j), uint8(idx-base))
}
}
}