go/src/cmd/link/internal/ld/objfile.go

549 lines
12 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ld
// Reading of Go object files.
import (
"bufio"
"bytes"
"cmd/internal/bio"
"cmd/internal/dwarf"
"cmd/internal/objabi"
"crypto/sha1"
"encoding/base64"
"io"
"log"
"strconv"
"strings"
)
const (
startmagic = "\x00\x00go19ld"
endmagic = "\xff\xffgo19ld"
)
var emptyPkg = []byte(`"".`)
// objReader reads Go object files.
type objReader struct {
rd *bufio.Reader
ctxt *Link
cmd/link: insert trampolines for too-far jumps on ARM ARM direct CALL/JMP instruction has 24 bit offset, which can only encodes jumps within +/-32M. When the target is too far, the top bits get truncated and the program jumps wild. This CL detects too-far jumps and automatically insert trampolines, currently only internal linking on ARM. It is necessary to make the following changes to the linker: - Resolve direct jump relocs when assigning addresses to functions. this allows trampoline insertion without moving all code that already laid down. - Lay down packages in dependency order, so that when resolving a inter-package direct jump reloc, the target address is already known. Intra-package jumps are assumed never too far. - a linker flag -debugtramp is added for debugging trampolines: "-debugtramp=1 -v" prints trampoline debug message "-debugtramp=2" forces all inter-package jump to use trampolines (currently ARM only) "-debugtramp=2 -v" does both - Some data structures are changed for bookkeeping. On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8 (unfortunate). In the standard library there is no ARM assembly code that uses these instructions, and the compiler no longer emits them (CL 29390). all.bash passes with -debugtramp=2, except a disassembly test (this is unavoidable as we changed the instruction). TBD: debug info of trampolines? Fixes #17028. Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474 Reviewed-on: https://go-review.googlesource.com/29397 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-09-14 14:47:12 -04:00
lib *Library
pn string
dupSym *Symbol
localSymVersion int
// rdBuf is used by readString and readSymName as scratch for reading strings.
rdBuf []byte
// List of symbol references for the file being read.
refs []*Symbol
data []byte
reloc []Reloc
pcdata []Pcdata
autom []Auto
funcdata []*Symbol
funcdataoff []int64
file []*Symbol
}
cmd/link: insert trampolines for too-far jumps on ARM ARM direct CALL/JMP instruction has 24 bit offset, which can only encodes jumps within +/-32M. When the target is too far, the top bits get truncated and the program jumps wild. This CL detects too-far jumps and automatically insert trampolines, currently only internal linking on ARM. It is necessary to make the following changes to the linker: - Resolve direct jump relocs when assigning addresses to functions. this allows trampoline insertion without moving all code that already laid down. - Lay down packages in dependency order, so that when resolving a inter-package direct jump reloc, the target address is already known. Intra-package jumps are assumed never too far. - a linker flag -debugtramp is added for debugging trampolines: "-debugtramp=1 -v" prints trampoline debug message "-debugtramp=2" forces all inter-package jump to use trampolines (currently ARM only) "-debugtramp=2 -v" does both - Some data structures are changed for bookkeeping. On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8 (unfortunate). In the standard library there is no ARM assembly code that uses these instructions, and the compiler no longer emits them (CL 29390). all.bash passes with -debugtramp=2, except a disassembly test (this is unavoidable as we changed the instruction). TBD: debug info of trampolines? Fixes #17028. Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474 Reviewed-on: https://go-review.googlesource.com/29397 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-09-14 14:47:12 -04:00
func LoadObjFile(ctxt *Link, f *bio.Reader, lib *Library, length int64, pn string) {
start := f.Offset()
r := &objReader{
rd: f.Reader,
cmd/link: insert trampolines for too-far jumps on ARM ARM direct CALL/JMP instruction has 24 bit offset, which can only encodes jumps within +/-32M. When the target is too far, the top bits get truncated and the program jumps wild. This CL detects too-far jumps and automatically insert trampolines, currently only internal linking on ARM. It is necessary to make the following changes to the linker: - Resolve direct jump relocs when assigning addresses to functions. this allows trampoline insertion without moving all code that already laid down. - Lay down packages in dependency order, so that when resolving a inter-package direct jump reloc, the target address is already known. Intra-package jumps are assumed never too far. - a linker flag -debugtramp is added for debugging trampolines: "-debugtramp=1 -v" prints trampoline debug message "-debugtramp=2" forces all inter-package jump to use trampolines (currently ARM only) "-debugtramp=2 -v" does both - Some data structures are changed for bookkeeping. On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8 (unfortunate). In the standard library there is no ARM assembly code that uses these instructions, and the compiler no longer emits them (CL 29390). all.bash passes with -debugtramp=2, except a disassembly test (this is unavoidable as we changed the instruction). TBD: debug info of trampolines? Fixes #17028. Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474 Reviewed-on: https://go-review.googlesource.com/29397 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-09-14 14:47:12 -04:00
lib: lib,
ctxt: ctxt,
pn: pn,
dupSym: &Symbol{Name: ".dup"},
localSymVersion: ctxt.Syms.IncVersion(),
}
r.loadObjFile()
if f.Offset() != start+length {
log.Fatalf("%s: unexpected end at %d, want %d", pn, f.Offset(), start+length)
}
}
func (r *objReader) loadObjFile() {
pkg := objabi.PathToPrefix(r.lib.Pkg)
// Magic header
var buf [8]uint8
r.readFull(buf[:])
if string(buf[:]) != startmagic {
log.Fatalf("%s: invalid file start %x %x %x %x %x %x %x %x", r.pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7])
}
// Version
c, err := r.rd.ReadByte()
if err != nil || c != 1 {
log.Fatalf("%s: invalid file version number %d", r.pn, c)
}
// Autolib
for {
lib := r.readString()
if lib == "" {
break
}
cmd/link: insert trampolines for too-far jumps on ARM ARM direct CALL/JMP instruction has 24 bit offset, which can only encodes jumps within +/-32M. When the target is too far, the top bits get truncated and the program jumps wild. This CL detects too-far jumps and automatically insert trampolines, currently only internal linking on ARM. It is necessary to make the following changes to the linker: - Resolve direct jump relocs when assigning addresses to functions. this allows trampoline insertion without moving all code that already laid down. - Lay down packages in dependency order, so that when resolving a inter-package direct jump reloc, the target address is already known. Intra-package jumps are assumed never too far. - a linker flag -debugtramp is added for debugging trampolines: "-debugtramp=1 -v" prints trampoline debug message "-debugtramp=2" forces all inter-package jump to use trampolines (currently ARM only) "-debugtramp=2 -v" does both - Some data structures are changed for bookkeeping. On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8 (unfortunate). In the standard library there is no ARM assembly code that uses these instructions, and the compiler no longer emits them (CL 29390). all.bash passes with -debugtramp=2, except a disassembly test (this is unavoidable as we changed the instruction). TBD: debug info of trampolines? Fixes #17028. Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474 Reviewed-on: https://go-review.googlesource.com/29397 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-09-14 14:47:12 -04:00
l := addlib(r.ctxt, pkg, r.pn, lib)
if l != nil {
r.lib.imports = append(r.lib.imports, l)
}
}
// Symbol references
r.refs = []*Symbol{nil} // zeroth ref is nil
for {
c, err := r.rd.Peek(1)
if err != nil {
log.Fatalf("%s: peeking: %v", r.pn, err)
}
if c[0] == 0xff {
r.rd.ReadByte()
break
}
r.readRef()
}
// Lengths
r.readSlices()
// Data section
r.readFull(r.data)
// Defined symbols
for {
c, err := r.rd.Peek(1)
if err != nil {
log.Fatalf("%s: peeking: %v", r.pn, err)
}
if c[0] == 0xff {
break
}
r.readSym()
}
// Magic footer
buf = [8]uint8{}
r.readFull(buf[:])
if string(buf[:]) != endmagic {
log.Fatalf("%s: invalid file end", r.pn)
}
}
func (r *objReader) readSlices() {
n := r.readInt()
r.data = make([]byte, n)
n = r.readInt()
r.reloc = make([]Reloc, n)
n = r.readInt()
r.pcdata = make([]Pcdata, n)
n = r.readInt()
r.autom = make([]Auto, n)
n = r.readInt()
r.funcdata = make([]*Symbol, n)
r.funcdataoff = make([]int64, n)
n = r.readInt()
r.file = make([]*Symbol, n)
}
// Symbols are prefixed so their content doesn't get confused with the magic footer.
const symPrefix = 0xfe
func (r *objReader) readSym() {
if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
log.Fatalln("readSym out of sync")
}
t := abiSymKindToSymKind[r.readInt()]
s := r.readSymIndex()
flags := r.readInt()
dupok := flags&1 != 0
local := flags&2 != 0
makeTypelink := flags&4 != 0
size := r.readInt()
typ := r.readSymIndex()
data := r.readData()
nreloc := r.readInt()
pkg := objabi.PathToPrefix(r.lib.Pkg)
isdup := false
var dup *Symbol
if s.Type != 0 && s.Type != SXREF {
if (t == SDATA || t == SBSS || t == SNOPTRBSS) && len(data) == 0 && nreloc == 0 {
if s.Size < int64(size) {
s.Size = int64(size)
}
if typ != nil && s.Gotype == nil {
s.Gotype = typ
}
return
}
if (s.Type == SDATA || s.Type == SBSS || s.Type == SNOPTRBSS) && len(s.P) == 0 && len(s.R) == 0 {
goto overwrite
}
if s.Type != SBSS && s.Type != SNOPTRBSS && !dupok && !s.Attr.DuplicateOK() {
log.Fatalf("duplicate symbol %s (types %d and %d) in %s and %s", s.Name, s.Type, t, s.File, r.pn)
}
if len(s.P) > 0 {
dup = s
s = r.dupSym
isdup = true
}
}
overwrite:
cmd/link: insert trampolines for too-far jumps on ARM ARM direct CALL/JMP instruction has 24 bit offset, which can only encodes jumps within +/-32M. When the target is too far, the top bits get truncated and the program jumps wild. This CL detects too-far jumps and automatically insert trampolines, currently only internal linking on ARM. It is necessary to make the following changes to the linker: - Resolve direct jump relocs when assigning addresses to functions. this allows trampoline insertion without moving all code that already laid down. - Lay down packages in dependency order, so that when resolving a inter-package direct jump reloc, the target address is already known. Intra-package jumps are assumed never too far. - a linker flag -debugtramp is added for debugging trampolines: "-debugtramp=1 -v" prints trampoline debug message "-debugtramp=2" forces all inter-package jump to use trampolines (currently ARM only) "-debugtramp=2 -v" does both - Some data structures are changed for bookkeeping. On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8 (unfortunate). In the standard library there is no ARM assembly code that uses these instructions, and the compiler no longer emits them (CL 29390). all.bash passes with -debugtramp=2, except a disassembly test (this is unavoidable as we changed the instruction). TBD: debug info of trampolines? Fixes #17028. Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474 Reviewed-on: https://go-review.googlesource.com/29397 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-09-14 14:47:12 -04:00
s.File = pkg
if dupok {
s.Attr |= AttrDuplicateOK
}
if t == SXREF {
log.Fatalf("bad sxref")
}
if t == 0 {
log.Fatalf("missing type for %s in %s", s.Name, r.pn)
}
if t == SBSS && (s.Type == SRODATA || s.Type == SNOPTRBSS) {
t = s.Type
}
s.Type = t
if s.Size < int64(size) {
s.Size = int64(size)
}
s.Attr.Set(AttrLocal, local)
s.Attr.Set(AttrMakeTypelink, makeTypelink)
if typ != nil {
s.Gotype = typ
}
if isdup && typ != nil { // if bss sym defined multiple times, take type from any one def
dup.Gotype = typ
}
s.P = data
if nreloc > 0 {
s.R = r.reloc[:nreloc:nreloc]
if !isdup {
r.reloc = r.reloc[nreloc:]
}
for i := 0; i < nreloc; i++ {
s.R[i] = Reloc{
Off: r.readInt32(),
Siz: r.readUint8(),
Type: objabi.RelocType(r.readInt32()),
Add: r.readInt64(),
Sym: r.readSymIndex(),
}
}
}
if s.Type == STEXT {
s.FuncInfo = new(FuncInfo)
pc := s.FuncInfo
pc.Args = r.readInt32()
pc.Locals = r.readInt32()
if r.readUint8() != 0 {
s.Attr |= AttrNoSplit
}
flags := r.readInt()
if flags&(1<<2) != 0 {
s.Attr |= AttrReflectMethod
}
n := r.readInt()
pc.Autom = r.autom[:n:n]
if !isdup {
r.autom = r.autom[n:]
}
for i := 0; i < n; i++ {
pc.Autom[i] = Auto{
Asym: r.readSymIndex(),
Aoffset: r.readInt32(),
Name: r.readInt16(),
Gotype: r.readSymIndex(),
}
}
pc.Pcsp.P = r.readData()
pc.Pcfile.P = r.readData()
pc.Pcline.P = r.readData()
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
pc.Pcinline.P = r.readData()
n = r.readInt()
pc.Pcdata = r.pcdata[:n:n]
if !isdup {
r.pcdata = r.pcdata[n:]
}
for i := 0; i < n; i++ {
pc.Pcdata[i].P = r.readData()
}
n = r.readInt()
pc.Funcdata = r.funcdata[:n:n]
pc.Funcdataoff = r.funcdataoff[:n:n]
if !isdup {
r.funcdata = r.funcdata[n:]
r.funcdataoff = r.funcdataoff[n:]
}
for i := 0; i < n; i++ {
pc.Funcdata[i] = r.readSymIndex()
}
for i := 0; i < n; i++ {
pc.Funcdataoff[i] = r.readInt64()
}
n = r.readInt()
pc.File = r.file[:n:n]
if !isdup {
r.file = r.file[n:]
}
for i := 0; i < n; i++ {
pc.File[i] = r.readSymIndex()
}
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
n = r.readInt()
pc.InlTree = make([]InlinedCall, n)
for i := 0; i < n; i++ {
pc.InlTree[i].Parent = r.readInt32()
pc.InlTree[i].File = r.readSymIndex()
pc.InlTree[i].Line = r.readInt32()
pc.InlTree[i].Func = r.readSymIndex()
}
cmd/link: insert trampolines for too-far jumps on ARM ARM direct CALL/JMP instruction has 24 bit offset, which can only encodes jumps within +/-32M. When the target is too far, the top bits get truncated and the program jumps wild. This CL detects too-far jumps and automatically insert trampolines, currently only internal linking on ARM. It is necessary to make the following changes to the linker: - Resolve direct jump relocs when assigning addresses to functions. this allows trampoline insertion without moving all code that already laid down. - Lay down packages in dependency order, so that when resolving a inter-package direct jump reloc, the target address is already known. Intra-package jumps are assumed never too far. - a linker flag -debugtramp is added for debugging trampolines: "-debugtramp=1 -v" prints trampoline debug message "-debugtramp=2" forces all inter-package jump to use trampolines (currently ARM only) "-debugtramp=2 -v" does both - Some data structures are changed for bookkeeping. On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8 (unfortunate). In the standard library there is no ARM assembly code that uses these instructions, and the compiler no longer emits them (CL 29390). all.bash passes with -debugtramp=2, except a disassembly test (this is unavoidable as we changed the instruction). TBD: debug info of trampolines? Fixes #17028. Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474 Reviewed-on: https://go-review.googlesource.com/29397 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-09-14 14:47:12 -04:00
if !dupok {
if s.Attr.OnList() {
log.Fatalf("symbol %s listed multiple times", s.Name)
}
s.Attr |= AttrOnList
cmd/link: insert trampolines for too-far jumps on ARM ARM direct CALL/JMP instruction has 24 bit offset, which can only encodes jumps within +/-32M. When the target is too far, the top bits get truncated and the program jumps wild. This CL detects too-far jumps and automatically insert trampolines, currently only internal linking on ARM. It is necessary to make the following changes to the linker: - Resolve direct jump relocs when assigning addresses to functions. this allows trampoline insertion without moving all code that already laid down. - Lay down packages in dependency order, so that when resolving a inter-package direct jump reloc, the target address is already known. Intra-package jumps are assumed never too far. - a linker flag -debugtramp is added for debugging trampolines: "-debugtramp=1 -v" prints trampoline debug message "-debugtramp=2" forces all inter-package jump to use trampolines (currently ARM only) "-debugtramp=2 -v" does both - Some data structures are changed for bookkeeping. On ARM, pseudo DIV/DIVU/MOD/MODU instructions now clobber R8 (unfortunate). In the standard library there is no ARM assembly code that uses these instructions, and the compiler no longer emits them (CL 29390). all.bash passes with -debugtramp=2, except a disassembly test (this is unavoidable as we changed the instruction). TBD: debug info of trampolines? Fixes #17028. Change-Id: Idcce347ea7e0af77c4079041a160b2f6e114b474 Reviewed-on: https://go-review.googlesource.com/29397 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-09-14 14:47:12 -04:00
r.lib.textp = append(r.lib.textp, s)
} else {
// there may ba a dup in another package
// put into a temp list and add to text later
if !isdup {
r.lib.dupTextSyms = append(r.lib.dupTextSyms, s)
} else {
r.lib.dupTextSyms = append(r.lib.dupTextSyms, dup)
}
}
}
if s.Type == SDWARFINFO {
r.patchDWARFName(s)
}
}
func (r *objReader) patchDWARFName(s *Symbol) {
// This is kind of ugly. Really the package name should not
// even be included here.
if s.Size < 1 || s.P[0] != dwarf.DW_ABRV_FUNCTION {
return
}
e := bytes.IndexByte(s.P, 0)
if e == -1 {
return
}
p := bytes.Index(s.P[:e], emptyPkg)
if p == -1 {
return
}
pkgprefix := []byte(objabi.PathToPrefix(r.lib.Pkg) + ".")
patched := bytes.Replace(s.P[:e], emptyPkg, pkgprefix, -1)
s.P = append(patched, s.P[e:]...)
delta := int64(len(s.P)) - s.Size
s.Size = int64(len(s.P))
for i := range s.R {
r := &s.R[i]
if r.Off > int32(e) {
r.Off += int32(delta)
}
}
}
func (r *objReader) readFull(b []byte) {
_, err := io.ReadFull(r.rd, b)
if err != nil {
log.Fatalf("%s: error reading %s", r.pn, err)
}
}
func (r *objReader) readRef() {
if c, err := r.rd.ReadByte(); c != symPrefix || err != nil {
log.Fatalf("readSym out of sync")
}
name := r.readSymName()
v := r.readInt()
if v != 0 && v != 1 {
cmd/internal/obj: rework gclocals handling The compiler handled gcargs and gclocals LSyms unusually. It generated placeholder symbols (makefuncdatasym), filled them in, and then renamed them for content-addressability. This is an important binary size optimization; the same locals information occurs over and over. This CL continues to treat these LSyms unusually, but in a slightly more explicit way, and importantly for concurrent compilation, in a way that does not require concurrent modification of Ctxt.Hash. Instead of creating gcargs and gclocals in the usual way, by creating a types.Sym and then an obj.LSym, we add them directly to obj.FuncInfo, initialize them in obj.InitTextSym, and deduplicate and add them to ctxt.Data at the end. Then the backend's job is simply to fill them in and rename them appropriately. Updates #15756 name old alloc/op new alloc/op delta Template 38.8MB ± 0% 38.7MB ± 0% -0.22% (p=0.016 n=5+5) Unicode 29.8MB ± 0% 29.8MB ± 0% ~ (p=0.690 n=5+5) GoTypes 113MB ± 0% 113MB ± 0% -0.24% (p=0.008 n=5+5) SSA 1.25GB ± 0% 1.24GB ± 0% -0.39% (p=0.008 n=5+5) Flate 25.3MB ± 0% 25.2MB ± 0% -0.43% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 31.7MB ± 0% -0.22% (p=0.008 n=5+5) Reflect 78.2MB ± 0% 77.6MB ± 0% -0.80% (p=0.008 n=5+5) Tar 26.6MB ± 0% 26.3MB ± 0% -0.85% (p=0.008 n=5+5) XML 42.4MB ± 0% 41.9MB ± 0% -1.04% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Template 378k ± 0% 377k ± 1% ~ (p=0.151 n=5+5) Unicode 321k ± 1% 321k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% -0.47% (p=0.016 n=5+5) SSA 9.71M ± 0% 9.67M ± 0% -0.33% (p=0.008 n=5+5) Flate 233k ± 1% 232k ± 1% ~ (p=0.151 n=5+5) GoParser 316k ± 0% 315k ± 0% -0.49% (p=0.016 n=5+5) Reflect 979k ± 0% 972k ± 0% -0.75% (p=0.008 n=5+5) Tar 250k ± 0% 247k ± 1% -0.92% (p=0.008 n=5+5) XML 392k ± 1% 389k ± 0% -0.67% (p=0.008 n=5+5) Change-Id: Idc36186ca9d2f8214b5f7720bbc27b6bb22fdc48 Reviewed-on: https://go-review.googlesource.com/40697 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-14 06:35:53 -07:00
log.Fatalf("invalid symbol version for %q: %d", name, v)
}
if v == 1 {
v = r.localSymVersion
}
s := r.ctxt.Syms.Lookup(name, v)
r.refs = append(r.refs, s)
if s == nil || v != 0 {
return
}
if s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 {
x, err := strconv.ParseUint(s.Name[5:], 16, 64)
if err != nil {
log.Panicf("failed to parse $-symbol %s: %v", s.Name, err)
}
s.Type = SRODATA
s.Attr |= AttrLocal
switch s.Name[:5] {
case "$f32.":
if uint64(uint32(x)) != x {
log.Panicf("$-symbol %s too large: %d", s.Name, x)
}
Adduint32(r.ctxt, s, uint32(x))
case "$f64.", "$i64.":
Adduint64(r.ctxt, s, x)
default:
log.Panicf("unrecognized $-symbol: %s", s.Name)
}
s.Attr.Set(AttrReachable, false)
}
if strings.HasPrefix(s.Name, "runtime.gcbits.") {
s.Attr |= AttrLocal
}
}
func (r *objReader) readInt64() int64 {
uv := uint64(0)
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
log.Fatalf("corrupt input")
}
c, err := r.rd.ReadByte()
if err != nil {
log.Fatalln("error reading input: ", err)
}
uv |= uint64(c&0x7F) << shift
if c&0x80 == 0 {
break
}
}
return int64(uv>>1) ^ (int64(uv<<63) >> 63)
}
func (r *objReader) readInt() int {
n := r.readInt64()
if int64(int(n)) != n {
log.Panicf("%v out of range for int", n)
}
return int(n)
}
func (r *objReader) readInt32() int32 {
n := r.readInt64()
if int64(int32(n)) != n {
log.Panicf("%v out of range for int32", n)
}
return int32(n)
}
func (r *objReader) readInt16() int16 {
n := r.readInt64()
if int64(int16(n)) != n {
log.Panicf("%v out of range for int16", n)
}
return int16(n)
}
func (r *objReader) readUint8() uint8 {
n := r.readInt64()
if int64(uint8(n)) != n {
log.Panicf("%v out of range for uint8", n)
}
return uint8(n)
}
func (r *objReader) readString() string {
n := r.readInt()
if cap(r.rdBuf) < n {
r.rdBuf = make([]byte, 2*n)
}
r.readFull(r.rdBuf[:n])
return string(r.rdBuf[:n])
}
func (r *objReader) readData() []byte {
n := r.readInt()
p := r.data[:n:n]
r.data = r.data[n:]
return p
}
// readSymName reads a symbol name, replacing all "". with pkg.
func (r *objReader) readSymName() string {
pkg := objabi.PathToPrefix(r.lib.Pkg)
n := r.readInt()
if n == 0 {
r.readInt64()
return ""
}
if cap(r.rdBuf) < n {
r.rdBuf = make([]byte, 2*n)
cmd/link: simplify readSymName, taking advantage of bufio.Reader Now that cmd/link uses bufio.Reader, take advantage of it. I find this new version easier to reason about. Reduces allocations by 1.1% when linking a basic HTTP server. Numbers are stable with each round measuring using: rm prof.mem; go tool link -o foo -memprofile=prof.mem -memprofilerate=1 foo.a Before: 65.36MB of 74.53MB total (87.70%) Dropped 157 nodes (cum <= 0.37MB) Showing top 10 nodes out of 39 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 28.81% 28.81% 21.48MB 28.81% cmd/link/internal/ld.Linklookup 16.04MB 21.52% 50.33% 16.04MB 21.52% cmd/link/internal/ld.(*objReader).readSlices 4.61MB 6.19% 56.52% 4.61MB 6.19% cmd/link/internal/ld.(*objReader).readSymName 4.51MB 6.05% 62.57% 6.32MB 8.48% cmd/link/internal/ld.writelines 4.50MB 6.03% 68.60% 4.50MB 6.03% cmd/link/internal/ld.Symgrow 4.02MB 5.39% 73.99% 4.02MB 5.39% cmd/link/internal/ld.linknew 3.98MB 5.34% 79.33% 3.98MB 5.34% cmd/link/internal/ld.setaddrplus 2.96MB 3.97% 83.30% 28.78MB 38.62% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.43% 85.73% 1.81MB 2.43% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.97% 87.70% 1.47MB 1.97% cmd/link/internal/ld.(*objReader).readSym After: 64.66MB of 73.87MB total (87.53%) Dropped 156 nodes (cum <= 0.37MB) Showing top 10 nodes out of 40 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 29.08% 29.08% 21.48MB 29.08% cmd/link/internal/ld.Linklookup 16.04MB 21.71% 50.79% 16.04MB 21.71% cmd/link/internal/ld.(*objReader).readSlices 4.51MB 6.10% 56.90% 6.32MB 8.56% cmd/link/internal/ld.writelines 4.50MB 6.09% 62.99% 4.50MB 6.09% cmd/link/internal/ld.Symgrow 4.02MB 5.44% 68.42% 4.02MB 5.44% cmd/link/internal/ld.linknew 3.98MB 5.38% 73.81% 3.98MB 5.38% cmd/link/internal/ld.setaddrplus 3.90MB 5.28% 79.09% 3.90MB 5.28% cmd/link/internal/ld.(*objReader).readSymName 2.96MB 4.01% 83.09% 28.08MB 38.01% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.45% 85.55% 1.81MB 2.45% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.99% 87.53% 1.47MB 1.99% cmd/link/internal/ld.(*objReader).readSym Also tested locally with asserts that that the calculated length is always correct and thus the adjName buf never reallocates. Change-Id: I19e3e8bfa6a12bcd8b5216f6232f42c122e4f80e Reviewed-on: https://go-review.googlesource.com/21481 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: David Crawshaw <crawshaw@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-03 18:27:17 +00:00
}
origName, err := r.rd.Peek(n)
if err == bufio.ErrBufferFull {
// Long symbol names are rare but exist. One source is type
// symbols for types with long string forms. See #15104.
origName = make([]byte, n)
r.readFull(origName)
} else if err != nil {
log.Fatalf("%s: error reading symbol: %v", r.pn, err)
}
cmd/link: simplify readSymName, taking advantage of bufio.Reader Now that cmd/link uses bufio.Reader, take advantage of it. I find this new version easier to reason about. Reduces allocations by 1.1% when linking a basic HTTP server. Numbers are stable with each round measuring using: rm prof.mem; go tool link -o foo -memprofile=prof.mem -memprofilerate=1 foo.a Before: 65.36MB of 74.53MB total (87.70%) Dropped 157 nodes (cum <= 0.37MB) Showing top 10 nodes out of 39 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 28.81% 28.81% 21.48MB 28.81% cmd/link/internal/ld.Linklookup 16.04MB 21.52% 50.33% 16.04MB 21.52% cmd/link/internal/ld.(*objReader).readSlices 4.61MB 6.19% 56.52% 4.61MB 6.19% cmd/link/internal/ld.(*objReader).readSymName 4.51MB 6.05% 62.57% 6.32MB 8.48% cmd/link/internal/ld.writelines 4.50MB 6.03% 68.60% 4.50MB 6.03% cmd/link/internal/ld.Symgrow 4.02MB 5.39% 73.99% 4.02MB 5.39% cmd/link/internal/ld.linknew 3.98MB 5.34% 79.33% 3.98MB 5.34% cmd/link/internal/ld.setaddrplus 2.96MB 3.97% 83.30% 28.78MB 38.62% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.43% 85.73% 1.81MB 2.43% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.97% 87.70% 1.47MB 1.97% cmd/link/internal/ld.(*objReader).readSym After: 64.66MB of 73.87MB total (87.53%) Dropped 156 nodes (cum <= 0.37MB) Showing top 10 nodes out of 40 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 29.08% 29.08% 21.48MB 29.08% cmd/link/internal/ld.Linklookup 16.04MB 21.71% 50.79% 16.04MB 21.71% cmd/link/internal/ld.(*objReader).readSlices 4.51MB 6.10% 56.90% 6.32MB 8.56% cmd/link/internal/ld.writelines 4.50MB 6.09% 62.99% 4.50MB 6.09% cmd/link/internal/ld.Symgrow 4.02MB 5.44% 68.42% 4.02MB 5.44% cmd/link/internal/ld.linknew 3.98MB 5.38% 73.81% 3.98MB 5.38% cmd/link/internal/ld.setaddrplus 3.90MB 5.28% 79.09% 3.90MB 5.28% cmd/link/internal/ld.(*objReader).readSymName 2.96MB 4.01% 83.09% 28.08MB 38.01% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.45% 85.55% 1.81MB 2.45% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.99% 87.53% 1.47MB 1.99% cmd/link/internal/ld.(*objReader).readSym Also tested locally with asserts that that the calculated length is always correct and thus the adjName buf never reallocates. Change-Id: I19e3e8bfa6a12bcd8b5216f6232f42c122e4f80e Reviewed-on: https://go-review.googlesource.com/21481 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: David Crawshaw <crawshaw@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-03 18:27:17 +00:00
adjName := r.rdBuf[:0]
for {
i := bytes.Index(origName, emptyPkg)
if i == -1 {
cmd/link: simplify readSymName, taking advantage of bufio.Reader Now that cmd/link uses bufio.Reader, take advantage of it. I find this new version easier to reason about. Reduces allocations by 1.1% when linking a basic HTTP server. Numbers are stable with each round measuring using: rm prof.mem; go tool link -o foo -memprofile=prof.mem -memprofilerate=1 foo.a Before: 65.36MB of 74.53MB total (87.70%) Dropped 157 nodes (cum <= 0.37MB) Showing top 10 nodes out of 39 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 28.81% 28.81% 21.48MB 28.81% cmd/link/internal/ld.Linklookup 16.04MB 21.52% 50.33% 16.04MB 21.52% cmd/link/internal/ld.(*objReader).readSlices 4.61MB 6.19% 56.52% 4.61MB 6.19% cmd/link/internal/ld.(*objReader).readSymName 4.51MB 6.05% 62.57% 6.32MB 8.48% cmd/link/internal/ld.writelines 4.50MB 6.03% 68.60% 4.50MB 6.03% cmd/link/internal/ld.Symgrow 4.02MB 5.39% 73.99% 4.02MB 5.39% cmd/link/internal/ld.linknew 3.98MB 5.34% 79.33% 3.98MB 5.34% cmd/link/internal/ld.setaddrplus 2.96MB 3.97% 83.30% 28.78MB 38.62% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.43% 85.73% 1.81MB 2.43% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.97% 87.70% 1.47MB 1.97% cmd/link/internal/ld.(*objReader).readSym After: 64.66MB of 73.87MB total (87.53%) Dropped 156 nodes (cum <= 0.37MB) Showing top 10 nodes out of 40 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 29.08% 29.08% 21.48MB 29.08% cmd/link/internal/ld.Linklookup 16.04MB 21.71% 50.79% 16.04MB 21.71% cmd/link/internal/ld.(*objReader).readSlices 4.51MB 6.10% 56.90% 6.32MB 8.56% cmd/link/internal/ld.writelines 4.50MB 6.09% 62.99% 4.50MB 6.09% cmd/link/internal/ld.Symgrow 4.02MB 5.44% 68.42% 4.02MB 5.44% cmd/link/internal/ld.linknew 3.98MB 5.38% 73.81% 3.98MB 5.38% cmd/link/internal/ld.setaddrplus 3.90MB 5.28% 79.09% 3.90MB 5.28% cmd/link/internal/ld.(*objReader).readSymName 2.96MB 4.01% 83.09% 28.08MB 38.01% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.45% 85.55% 1.81MB 2.45% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.99% 87.53% 1.47MB 1.99% cmd/link/internal/ld.(*objReader).readSym Also tested locally with asserts that that the calculated length is always correct and thus the adjName buf never reallocates. Change-Id: I19e3e8bfa6a12bcd8b5216f6232f42c122e4f80e Reviewed-on: https://go-review.googlesource.com/21481 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: David Crawshaw <crawshaw@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-03 18:27:17 +00:00
s := string(append(adjName, origName...))
// Read past the peeked origName, now that we're done with it,
// using the rfBuf (also no longer used) as the scratch space.
// TODO: use bufio.Reader.Discard if available instead?
if err == nil {
r.readFull(r.rdBuf[:n])
}
r.rdBuf = adjName[:0] // in case 2*n wasn't enough
if Buildmode == BuildmodeShared || *FlagLinkshared {
// These types are included in the symbol
// table when dynamically linking. To keep
// binary size down, we replace the names
// with SHA-1 prefixes.
//
// Keep the type.. prefix, which parts of the
// linker (like the DWARF generator) know means
// the symbol is not decodable.
//
// Leave type.runtime. symbols alone, because
// other parts of the linker manipulates them,
// and also symbols whose names would not be
// shortened by this process.
if len(s) > 14 && strings.HasPrefix(s, "type.") && !strings.HasPrefix(s, "type.runtime.") {
hash := sha1.Sum([]byte(s))
prefix := "type."
if s[5] == '.' {
prefix = "type.."
}
s = prefix + base64.StdEncoding.EncodeToString(hash[:6])
}
}
cmd/link: simplify readSymName, taking advantage of bufio.Reader Now that cmd/link uses bufio.Reader, take advantage of it. I find this new version easier to reason about. Reduces allocations by 1.1% when linking a basic HTTP server. Numbers are stable with each round measuring using: rm prof.mem; go tool link -o foo -memprofile=prof.mem -memprofilerate=1 foo.a Before: 65.36MB of 74.53MB total (87.70%) Dropped 157 nodes (cum <= 0.37MB) Showing top 10 nodes out of 39 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 28.81% 28.81% 21.48MB 28.81% cmd/link/internal/ld.Linklookup 16.04MB 21.52% 50.33% 16.04MB 21.52% cmd/link/internal/ld.(*objReader).readSlices 4.61MB 6.19% 56.52% 4.61MB 6.19% cmd/link/internal/ld.(*objReader).readSymName 4.51MB 6.05% 62.57% 6.32MB 8.48% cmd/link/internal/ld.writelines 4.50MB 6.03% 68.60% 4.50MB 6.03% cmd/link/internal/ld.Symgrow 4.02MB 5.39% 73.99% 4.02MB 5.39% cmd/link/internal/ld.linknew 3.98MB 5.34% 79.33% 3.98MB 5.34% cmd/link/internal/ld.setaddrplus 2.96MB 3.97% 83.30% 28.78MB 38.62% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.43% 85.73% 1.81MB 2.43% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.97% 87.70% 1.47MB 1.97% cmd/link/internal/ld.(*objReader).readSym After: 64.66MB of 73.87MB total (87.53%) Dropped 156 nodes (cum <= 0.37MB) Showing top 10 nodes out of 40 (cum >= 1.47MB) flat flat% sum% cum cum% 21.48MB 29.08% 29.08% 21.48MB 29.08% cmd/link/internal/ld.Linklookup 16.04MB 21.71% 50.79% 16.04MB 21.71% cmd/link/internal/ld.(*objReader).readSlices 4.51MB 6.10% 56.90% 6.32MB 8.56% cmd/link/internal/ld.writelines 4.50MB 6.09% 62.99% 4.50MB 6.09% cmd/link/internal/ld.Symgrow 4.02MB 5.44% 68.42% 4.02MB 5.44% cmd/link/internal/ld.linknew 3.98MB 5.38% 73.81% 3.98MB 5.38% cmd/link/internal/ld.setaddrplus 3.90MB 5.28% 79.09% 3.90MB 5.28% cmd/link/internal/ld.(*objReader).readSymName 2.96MB 4.01% 83.09% 28.08MB 38.01% cmd/link/internal/ld.(*objReader).readRef 1.81MB 2.45% 85.55% 1.81MB 2.45% cmd/link/internal/ld.newcfaoffsetattr 1.47MB 1.99% 87.53% 1.47MB 1.99% cmd/link/internal/ld.(*objReader).readSym Also tested locally with asserts that that the calculated length is always correct and thus the adjName buf never reallocates. Change-Id: I19e3e8bfa6a12bcd8b5216f6232f42c122e4f80e Reviewed-on: https://go-review.googlesource.com/21481 Reviewed-by: David Crawshaw <crawshaw@golang.org> Run-TryBot: David Crawshaw <crawshaw@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-03 18:27:17 +00:00
return s
}
adjName = append(adjName, origName[:i]...)
adjName = append(adjName, pkg...)
adjName = append(adjName, '.')
origName = origName[i+len(emptyPkg):]
}
}
// Reads the index of a symbol reference and resolves it to a symbol
func (r *objReader) readSymIndex() *Symbol {
i := r.readInt()
return r.refs[i]
}