go/src/cmd/internal/obj/link.go

535 lines
16 KiB
Go
Raw Normal View History

// Derived from Inferno utils/6l/l.h and related files.
// https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/l.h
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
// Portions Copyright © 1997-1999 Vita Nuova Limited
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
// Portions Copyright © 2004,2006 Bruce Ellis
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
// Portions Copyright © 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
package obj
import (
"bufio"
"cmd/internal/dwarf"
"cmd/internal/objabi"
"cmd/internal/src"
"cmd/internal/sys"
"fmt"
)
// An Addr is an argument to an instruction.
// The general forms and their encodings are:
//
// sym±offset(symkind)(reg)(index*scale)
// Memory reference at address &sym(symkind) + offset + reg + index*scale.
// Any of sym(symkind), ±offset, (reg), (index*scale), and *scale can be omitted.
// If (reg) and *scale are both omitted, the resulting expression (index) is parsed as (reg).
// To force a parsing as index*scale, write (index*1).
// Encoding:
// type = TYPE_MEM
// name = symkind (NAME_AUTO, ...) or 0 (NAME_NONE)
// sym = sym
// offset = ±offset
// reg = reg (REG_*)
// index = index (REG_*)
// scale = scale (1, 2, 4, 8)
//
// $<mem>
// Effective address of memory reference <mem>, defined above.
// Encoding: same as memory reference, but type = TYPE_ADDR.
//
// $<±integer value>
// This is a special case of $<mem>, in which only ±offset is present.
// It has a separate type for easy recognition.
// Encoding:
// type = TYPE_CONST
// offset = ±integer value
//
// *<mem>
// Indirect reference through memory reference <mem>, defined above.
// Only used on x86 for CALL/JMP *sym(SB), which calls/jumps to a function
// pointer stored in the data word sym(SB), not a function named sym(SB).
// Encoding: same as above, but type = TYPE_INDIR.
//
// $*$<mem>
// No longer used.
// On machines with actual SB registers, $*$<mem> forced the
// instruction encoding to use a full 32-bit constant, never a
// reference relative to SB.
//
// $<floating point literal>
// Floating point constant value.
// Encoding:
// type = TYPE_FCONST
// val = floating point value
//
// $<string literal, up to 8 chars>
// String literal value (raw bytes used for DATA instruction).
// Encoding:
// type = TYPE_SCONST
// val = string
//
// <register name>
// Any register: integer, floating point, control, segment, and so on.
// If looking for specific register kind, must check type and reg value range.
// Encoding:
// type = TYPE_REG
// reg = reg (REG_*)
//
// x(PC)
// Encoding:
// type = TYPE_BRANCH
// val = Prog* reference OR ELSE offset = target pc (branch takes priority)
//
// $±x-±y
// Final argument to TEXT, specifying local frame size x and argument size y.
// In this form, x and y are integer literals only, not arbitrary expressions.
// This avoids parsing ambiguities due to the use of - as a separator.
// The ± are optional.
// If the final argument to TEXT omits the -±y, the encoding should still
// use TYPE_TEXTSIZE (not TYPE_CONST), with u.argsize = ArgsSizeUnknown.
// Encoding:
// type = TYPE_TEXTSIZE
// offset = x
// val = int32(y)
//
// reg<<shift, reg>>shift, reg->shift, reg@>shift
// Shifted register value, for ARM and ARM64.
// In this form, reg must be a register and shift can be a register or an integer constant.
// Encoding:
// type = TYPE_SHIFT
// On ARM:
// offset = (reg&15) | shifttype<<5 | count
// shifttype = 0, 1, 2, 3 for <<, >>, ->, @>
// count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant.
// On ARM64:
// offset = (reg&31)<<16 | shifttype<<22 | (count&63)<<10
// shifttype = 0, 1, 2 for <<, >>, ->
//
// (reg, reg)
// A destination register pair. When used as the last argument of an instruction,
// this form makes clear that both registers are destinations.
// Encoding:
// type = TYPE_REGREG
// reg = first register
// offset = second register
//
// [reg, reg, reg-reg]
// Register list for ARM.
// Encoding:
// type = TYPE_REGLIST
// offset = bit mask of registers in list; R0 is low bit.
//
// reg, reg
// Register pair for ARM.
// TYPE_REGREG2
//
// (reg+reg)
// Register pair for PPC64.
// Encoding:
// type = TYPE_MEM
// reg = first register
// index = second register
// scale = 1
//
type Addr struct {
Reg int16
Index int16
Scale int16 // Sometimes holds a register.
Type AddrType
Name AddrName
Class int8
Offset int64
Sym *LSym
// argument value:
// for TYPE_SCONST, a string
// for TYPE_FCONST, a float64
// for TYPE_BRANCH, a *Prog (optional)
// for TYPE_TEXTSIZE, an int32 (optional)
Val interface{}
}
type AddrName int8
const (
NAME_NONE AddrName = iota
NAME_EXTERN
NAME_STATIC
NAME_AUTO
NAME_PARAM
// A reference to name@GOT(SB) is a reference to the entry in the global offset
// table for 'name'.
NAME_GOTREF
)
type AddrType uint8
const (
TYPE_NONE AddrType = iota
TYPE_BRANCH
TYPE_TEXTSIZE
TYPE_MEM
TYPE_CONST
TYPE_FCONST
TYPE_SCONST
TYPE_REG
TYPE_ADDR
TYPE_SHIFT
TYPE_REGREG
TYPE_REGREG2
TYPE_INDIR
TYPE_REGLIST
)
// Prog describes a single machine instruction.
//
// The general instruction form is:
//
// As.Scond From, Reg, From3, To, RegTo2
//
// where As is an opcode and the others are arguments:
// From, Reg, From3 are sources, and To, RegTo2 are destinations.
// Usually, not all arguments are present.
// For example, MOVL R1, R2 encodes using only As=MOVL, From=R1, To=R2.
// The Scond field holds additional condition bits for systems (like arm)
// that have generalized conditional execution.
//
// Jump instructions use the Pcond field to point to the target instruction,
// which must be in the same linked list as the jump instruction.
//
// The Progs for a given function are arranged in a list linked through the Link field.
//
// Each Prog is charged to a specific source line in the debug information,
// specified by Pos.Line().
// Every Prog has a Ctxt field that defines its context.
cmd/compile: teach assemblers to accept a Prog allocator The existing bulk Prog allocator is not concurrency-safe. To allow for concurrency-safe bulk allocation of Progs, I want to move Prog allocation and caching upstream, to the clients of cmd/internal/obj. This is a preliminary enabling refactoring. After this CL, instead of calling Ctxt.NewProg throughout the assemblers, we thread through a newprog function that returns a new Prog. That function is set up to be Ctxt.NewProg, so there are no real changes in this CL; this CL only establishes the plumbing. Passes toolstash-check -all. Negligible compiler performance impact. Updates #15756 name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.574 n=49+47) Unicode 90.1ms ± 5% 89.9ms ± 4% ~ (p=0.417 n=50+49) GoTypes 585ms ± 4% 584ms ± 3% ~ (p=0.466 n=49+49) SSA 6.50s ± 3% 6.52s ± 2% ~ (p=0.251 n=49+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.673 n=49+50) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.810 n=48+49) Reflect 372ms ± 4% 372ms ± 5% ~ (p=0.778 n=49+50) Tar 113ms ± 5% 111ms ± 4% -0.98% (p=0.016 n=50+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.483 n=47+49) [Geo mean] 285ms 285ms -0.17% name old user-ns/op new user-ns/op delta Template 253M ± 8% 254M ± 9% ~ (p=0.899 n=50+50) Unicode 106M ± 9% 106M ±11% ~ (p=0.642 n=50+50) GoTypes 736M ± 4% 740M ± 4% ~ (p=0.121 n=50+49) SSA 8.82G ± 3% 8.88G ± 2% +0.65% (p=0.006 n=49+48) Flate 147M ± 4% 147M ± 5% ~ (p=0.844 n=47+48) GoParser 179M ± 4% 178M ± 6% ~ (p=0.785 n=50+50) Reflect 443M ± 6% 441M ± 5% ~ (p=0.850 n=48+47) Tar 126M ± 5% 126M ± 5% ~ (p=0.734 n=50+50) XML 244M ± 5% 244M ± 5% ~ (p=0.594 n=49+50) [Geo mean] 341M 341M +0.11% Change-Id: Ice962f61eb3a524c2db00a166cb582c22caa7d68 Reviewed-on: https://go-review.googlesource.com/39633 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-04 14:31:55 -07:00
// For performance reasons, Progs usually are usually bulk allocated, cached, and reused;
// those bulk allocators should always be used, rather than new(Prog).
//
// The other fields not yet mentioned are for use by the back ends and should
// be left zeroed by creators of Prog lists.
type Prog struct {
Ctxt *Link // linker context
Link *Prog // next Prog in linked list
From Addr // first source operand
From3 *Addr // third source operand (second is Reg below)
To Addr // destination operand (second is RegTo2 below)
Pcond *Prog // target of conditional jump
Forwd *Prog // for x86 back end
Rel *Prog // for x86, arm back ends
Pc int64 // for back ends or assembler: virtual or actual program counter, depending on phase
Pos src.XPos // source position of this instruction
Spadj int32 // effect of instruction on stack pointer (increment or decrement amount)
As As // assembler opcode
Reg int16 // 2nd source operand
RegTo2 int16 // 2nd destination operand
Mark uint16 // bitmask of arch-specific items
Optab uint16 // arch-specific opcode index
Scond uint8 // condition bits for conditional instruction (e.g., on ARM)
Back uint8 // for x86 back end: backwards branch state
Ft uint8 // for x86 back end: type index of Prog.From
Tt uint8 // for x86 back end: type index of Prog.To
Isize uint8 // for x86 back end: size of the instruction in bytes
}
// From3Type returns From3.Type, or TYPE_NONE when From3 is nil.
func (p *Prog) From3Type() AddrType {
if p.From3 == nil {
return TYPE_NONE
}
return p.From3.Type
}
// An As denotes an assembler opcode.
// There are some portable opcodes, declared here in package obj,
// that are common to all architectures.
// However, the majority of opcodes are arch-specific
// and are declared in their respective architecture's subpackage.
type As int16
// These are the portable opcodes.
const (
AXXX As = iota
ACALL
ADUFFCOPY
ADUFFZERO
AEND
AFUNCDATA
AJMP
ANOP
APCDATA
ARET
ATEXT
AUNDEF
A_ARCHSPECIFIC
)
// Each architecture is allotted a distinct subspace of opcode values
// for declaring its arch-specific opcodes.
// Within this subspace, the first arch-specific opcode should be
// at offset A_ARCHSPECIFIC.
//
// Subspaces are aligned to a power of two so opcodes can be masked
// with AMask and used as compact array indices.
const (
ABase386 = (1 + iota) << 10
ABaseARM
ABaseAMD64
ABasePPC64
ABaseARM64
ABaseMIPS
ABaseS390X
AllowedOpCodes = 1 << 10 // The number of opcodes available for any given architecture.
AMask = AllowedOpCodes - 1 // AND with this to use the opcode as an array index.
)
// An LSym is the sort of symbol that is written to an object file.
type LSym struct {
Name string
Type objabi.SymKind
Version int16
Attribute
RefIdx int // Index of this symbol in the symbol reference list.
Size int64
Gotype *LSym
P []byte
R []Reloc
Func *FuncInfo
}
// A FuncInfo contains extra fields for STEXT symbols.
type FuncInfo struct {
Args int32
Locals int32
Text *Prog
Autom []*Auto
Pcln Pcln
dwarfSym *LSym
cmd/internal/obj: rework gclocals handling The compiler handled gcargs and gclocals LSyms unusually. It generated placeholder symbols (makefuncdatasym), filled them in, and then renamed them for content-addressability. This is an important binary size optimization; the same locals information occurs over and over. This CL continues to treat these LSyms unusually, but in a slightly more explicit way, and importantly for concurrent compilation, in a way that does not require concurrent modification of Ctxt.Hash. Instead of creating gcargs and gclocals in the usual way, by creating a types.Sym and then an obj.LSym, we add them directly to obj.FuncInfo, initialize them in obj.InitTextSym, and deduplicate and add them to ctxt.Data at the end. Then the backend's job is simply to fill them in and rename them appropriately. Updates #15756 name old alloc/op new alloc/op delta Template 38.8MB ± 0% 38.7MB ± 0% -0.22% (p=0.016 n=5+5) Unicode 29.8MB ± 0% 29.8MB ± 0% ~ (p=0.690 n=5+5) GoTypes 113MB ± 0% 113MB ± 0% -0.24% (p=0.008 n=5+5) SSA 1.25GB ± 0% 1.24GB ± 0% -0.39% (p=0.008 n=5+5) Flate 25.3MB ± 0% 25.2MB ± 0% -0.43% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 31.7MB ± 0% -0.22% (p=0.008 n=5+5) Reflect 78.2MB ± 0% 77.6MB ± 0% -0.80% (p=0.008 n=5+5) Tar 26.6MB ± 0% 26.3MB ± 0% -0.85% (p=0.008 n=5+5) XML 42.4MB ± 0% 41.9MB ± 0% -1.04% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Template 378k ± 0% 377k ± 1% ~ (p=0.151 n=5+5) Unicode 321k ± 1% 321k ± 0% ~ (p=0.841 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% -0.47% (p=0.016 n=5+5) SSA 9.71M ± 0% 9.67M ± 0% -0.33% (p=0.008 n=5+5) Flate 233k ± 1% 232k ± 1% ~ (p=0.151 n=5+5) GoParser 316k ± 0% 315k ± 0% -0.49% (p=0.016 n=5+5) Reflect 979k ± 0% 972k ± 0% -0.75% (p=0.008 n=5+5) Tar 250k ± 0% 247k ± 1% -0.92% (p=0.008 n=5+5) XML 392k ± 1% 389k ± 0% -0.67% (p=0.008 n=5+5) Change-Id: Idc36186ca9d2f8214b5f7720bbc27b6bb22fdc48 Reviewed-on: https://go-review.googlesource.com/40697 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-14 06:35:53 -07:00
GCArgs LSym
GCLocals LSym
}
// Attribute is a set of symbol attributes.
type Attribute int16
const (
AttrDuplicateOK Attribute = 1 << iota
AttrCFunc
AttrNoSplit
AttrLeaf
cmd/internal/obj: stop storing Text flags in From3 Prior to this CL, flags such as NOSPLIT on ATEXT Progs were stored in From3.Offset. Some but not all of those flags were also duplicated into From.Sym.Attribute. This CL migrates all of those flags into From.Sym.Attribute and stops creating a From3. A side-effect of this is that printing an ATEXT Prog can no longer simply dump From3.Offset. That's kind of good, since the raw flag value wasn't very informative anyway, but it did necessitate a bunch of updates to the cmd/asm tests. The reason I'm doing this work now is that avoiding storing flags in both From.Sym and From3.Offset simplifies some other changes to fix the data race first described in CL 40254. This CL almost passes toolstash-check -all. The only changes are in cases where the assembler has decided that a function's flags may be altered, e.g. to make a function with no calls in it NOSPLIT. Prior to this CL, that information was not printed. Sample before: "".Ctz64 t=1 size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Sample after: "".Ctz64 t=1 nosplit size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), NOSPLIT, $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Observe the additional "nosplit" in the first line and the additional "NOSPLIT" in the second line. Updates #15756 Change-Id: I5c59bd8f3bdc7c780361f801d94a261f0aef3d13 Reviewed-on: https://go-review.googlesource.com/40495 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-04-11 15:15:04 -07:00
AttrWrapper
AttrNeedCtxt
AttrNoFrame
AttrSeenGlobl
AttrOnList
// MakeTypelink means that the type should have an entry in the typelink table.
AttrMakeTypelink
// ReflectMethod means the function may call reflect.Type.Method or
// reflect.Type.MethodByName. Matching is imprecise (as reflect.Type
// can be used through a custom interface), so ReflectMethod may be
// set in some cases when the reflect package is not called.
//
// Used by the linker to determine what methods can be pruned.
AttrReflectMethod
// Local means make the symbol local even when compiling Go code to reference Go
// symbols in other shared libraries, as in this mode symbols are global by
// default. "local" here means in the sense of the dynamic linker, i.e. not
// visible outside of the module (shared library or executable) that contains its
// definition. (When not compiling to support Go shared libraries, all symbols are
// local in this sense unless there is a cgo_export_* directive).
AttrLocal
)
func (a Attribute) DuplicateOK() bool { return a&AttrDuplicateOK != 0 }
func (a Attribute) MakeTypelink() bool { return a&AttrMakeTypelink != 0 }
func (a Attribute) CFunc() bool { return a&AttrCFunc != 0 }
func (a Attribute) NoSplit() bool { return a&AttrNoSplit != 0 }
func (a Attribute) Leaf() bool { return a&AttrLeaf != 0 }
func (a Attribute) SeenGlobl() bool { return a&AttrSeenGlobl != 0 }
func (a Attribute) OnList() bool { return a&AttrOnList != 0 }
func (a Attribute) ReflectMethod() bool { return a&AttrReflectMethod != 0 }
func (a Attribute) Local() bool { return a&AttrLocal != 0 }
cmd/internal/obj: stop storing Text flags in From3 Prior to this CL, flags such as NOSPLIT on ATEXT Progs were stored in From3.Offset. Some but not all of those flags were also duplicated into From.Sym.Attribute. This CL migrates all of those flags into From.Sym.Attribute and stops creating a From3. A side-effect of this is that printing an ATEXT Prog can no longer simply dump From3.Offset. That's kind of good, since the raw flag value wasn't very informative anyway, but it did necessitate a bunch of updates to the cmd/asm tests. The reason I'm doing this work now is that avoiding storing flags in both From.Sym and From3.Offset simplifies some other changes to fix the data race first described in CL 40254. This CL almost passes toolstash-check -all. The only changes are in cases where the assembler has decided that a function's flags may be altered, e.g. to make a function with no calls in it NOSPLIT. Prior to this CL, that information was not printed. Sample before: "".Ctz64 t=1 size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Sample after: "".Ctz64 t=1 nosplit size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), NOSPLIT, $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Observe the additional "nosplit" in the first line and the additional "NOSPLIT" in the second line. Updates #15756 Change-Id: I5c59bd8f3bdc7c780361f801d94a261f0aef3d13 Reviewed-on: https://go-review.googlesource.com/40495 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-04-11 15:15:04 -07:00
func (a Attribute) Wrapper() bool { return a&AttrWrapper != 0 }
func (a Attribute) NeedCtxt() bool { return a&AttrNeedCtxt != 0 }
func (a Attribute) NoFrame() bool { return a&AttrNoFrame != 0 }
func (a *Attribute) Set(flag Attribute, value bool) {
if value {
*a |= flag
} else {
*a &^= flag
}
}
cmd/internal/obj: stop storing Text flags in From3 Prior to this CL, flags such as NOSPLIT on ATEXT Progs were stored in From3.Offset. Some but not all of those flags were also duplicated into From.Sym.Attribute. This CL migrates all of those flags into From.Sym.Attribute and stops creating a From3. A side-effect of this is that printing an ATEXT Prog can no longer simply dump From3.Offset. That's kind of good, since the raw flag value wasn't very informative anyway, but it did necessitate a bunch of updates to the cmd/asm tests. The reason I'm doing this work now is that avoiding storing flags in both From.Sym and From3.Offset simplifies some other changes to fix the data race first described in CL 40254. This CL almost passes toolstash-check -all. The only changes are in cases where the assembler has decided that a function's flags may be altered, e.g. to make a function with no calls in it NOSPLIT. Prior to this CL, that information was not printed. Sample before: "".Ctz64 t=1 size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Sample after: "".Ctz64 t=1 nosplit size=63 args=0x10 locals=0x0 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) TEXT "".Ctz64(SB), NOSPLIT, $0-16 0x0000 00000 (/Users/josh/go/tip/src/runtime/internal/sys/intrinsics.go:35) FUNCDATA $0, gclocals·f207267fbf96a0178e8758c6e3e0ce28(SB) Observe the additional "nosplit" in the first line and the additional "NOSPLIT" in the second line. Updates #15756 Change-Id: I5c59bd8f3bdc7c780361f801d94a261f0aef3d13 Reviewed-on: https://go-review.googlesource.com/40495 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-04-11 15:15:04 -07:00
var textAttrStrings = [...]struct {
bit Attribute
s string
}{
{bit: AttrDuplicateOK, s: "DUPOK"},
{bit: AttrMakeTypelink, s: ""},
{bit: AttrCFunc, s: "CFUNC"},
{bit: AttrNoSplit, s: "NOSPLIT"},
{bit: AttrLeaf, s: "LEAF"},
{bit: AttrSeenGlobl, s: ""},
{bit: AttrOnList, s: ""},
{bit: AttrReflectMethod, s: "REFLECTMETHOD"},
{bit: AttrLocal, s: "LOCAL"},
{bit: AttrWrapper, s: "WRAPPER"},
{bit: AttrNeedCtxt, s: "NEEDCTXT"},
{bit: AttrNoFrame, s: "NOFRAME"},
}
// TextAttrString formats a for printing in as part of a TEXT prog.
func (a Attribute) TextAttrString() string {
var s string
for _, x := range textAttrStrings {
if a&x.bit != 0 {
if x.s != "" {
s += x.s + "|"
}
a &^= x.bit
}
}
if a != 0 {
s += fmt.Sprintf("UnknownAttribute(%d)|", a)
}
// Chop off trailing |, if present.
if len(s) > 0 {
s = s[:len(s)-1]
}
return s
}
// The compiler needs LSym to satisfy fmt.Stringer, because it stores
// an LSym in ssa.ExternSymbol.
func (s *LSym) String() string {
return s.Name
}
type Pcln struct {
Pcsp Pcdata
Pcfile Pcdata
Pcline Pcdata
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
Pcinline Pcdata
Pcdata []Pcdata
Funcdata []*LSym
Funcdataoff []int64
File []string
Lastfile string
Lastindex int
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
InlTree InlTree // per-function inlining tree extracted from the global tree
}
type Reloc struct {
Off int32
Siz uint8
Type objabi.RelocType
Add int64
Sym *LSym
}
type Auto struct {
Asym *LSym
Aoffset int32
Name AddrName
Gotype *LSym
}
type Pcdata struct {
P []byte
}
// Link holds the context for writing object code from a compiler
// to be linker input or for reading that input into the linker.
type Link struct {
Headtype objabi.HeadType
Arch *LinkArch
Debugasm bool
Debugvlog bool
Debugpcln string
Flag_shared bool
Flag_dynlink bool
Flag_optimize bool
Bso *bufio.Writer
Pathname string
cmd/internal/obj: split Link.hash into version 0 and 1 Though LSym.Version is an int, it can only have the value 0 or 1. Using that, split Link.hash into two maps, one for version 0 (which is far more common) and one for version 1. This lets use just the name for lookups, which is both faster and more compact. This matters because Link.hash map lookups are frequent, and will be contended once the backend is concurrent. name old time/op new time/op delta Template 194ms ± 3% 192ms ± 5% -1.46% (p=0.000 n=47+49) Unicode 84.5ms ± 3% 83.8ms ± 3% -0.81% (p=0.011 n=50+49) GoTypes 543ms ± 2% 545ms ± 4% ~ (p=0.566 n=46+49) Compiler 2.48s ± 2% 2.48s ± 3% ~ (p=0.706 n=47+50) SSA 5.94s ± 3% 5.98s ± 2% +0.55% (p=0.040 n=49+50) Flate 119ms ± 6% 119ms ± 4% ~ (p=0.681 n=48+47) GoParser 145ms ± 4% 145ms ± 3% ~ (p=0.662 n=47+49) Reflect 348ms ± 3% 344ms ± 3% -1.17% (p=0.000 n=47+47) Tar 105ms ± 4% 104ms ± 3% ~ (p=0.155 n=50+47) XML 197ms ± 2% 197ms ± 3% ~ (p=0.666 n=49+49) [Geo mean] 332ms 331ms -0.37% name old user-time/op new user-time/op delta Template 230ms ±10% 226ms ±10% -1.85% (p=0.041 n=50+50) Unicode 104ms ± 6% 103ms ± 5% ~ (p=0.076 n=49+49) GoTypes 707ms ± 4% 705ms ± 5% ~ (p=0.521 n=50+50) Compiler 3.30s ± 3% 3.33s ± 4% +0.76% (p=0.003 n=50+49) SSA 8.17s ± 4% 8.23s ± 3% +0.66% (p=0.030 n=50+49) Flate 139ms ± 6% 138ms ± 8% ~ (p=0.184 n=49+48) GoParser 174ms ± 5% 172ms ± 6% ~ (p=0.107 n=48+49) Reflect 431ms ± 8% 420ms ± 5% -2.57% (p=0.000 n=50+46) Tar 119ms ± 6% 118ms ± 7% -0.95% (p=0.033 n=50+49) XML 236ms ± 4% 236ms ± 4% ~ (p=0.935 n=50+48) [Geo mean] 410ms 407ms -0.67% name old alloc/op new alloc/op delta Template 38.7MB ± 0% 38.6MB ± 0% -0.29% (p=0.008 n=5+5) Unicode 29.8MB ± 0% 29.7MB ± 0% -0.24% (p=0.008 n=5+5) GoTypes 113MB ± 0% 113MB ± 0% -0.29% (p=0.008 n=5+5) Compiler 462MB ± 0% 462MB ± 0% -0.12% (p=0.008 n=5+5) SSA 1.27GB ± 0% 1.27GB ± 0% -0.05% (p=0.008 n=5+5) Flate 25.2MB ± 0% 25.1MB ± 0% -0.37% (p=0.008 n=5+5) GoParser 31.7MB ± 0% 31.6MB ± 0% ~ (p=0.056 n=5+5) Reflect 77.5MB ± 0% 77.2MB ± 0% -0.38% (p=0.008 n=5+5) Tar 26.4MB ± 0% 26.3MB ± 0% ~ (p=0.151 n=5+5) XML 41.9MB ± 0% 41.9MB ± 0% -0.20% (p=0.032 n=5+5) [Geo mean] 74.5MB 74.3MB -0.23% name old allocs/op new allocs/op delta Template 378k ± 1% 377k ± 1% ~ (p=0.690 n=5+5) Unicode 321k ± 0% 322k ± 0% ~ (p=0.595 n=5+5) GoTypes 1.14M ± 0% 1.14M ± 0% ~ (p=0.310 n=5+5) Compiler 4.25M ± 0% 4.25M ± 0% ~ (p=0.151 n=5+5) SSA 9.84M ± 0% 9.84M ± 0% ~ (p=0.841 n=5+5) Flate 232k ± 1% 232k ± 0% ~ (p=0.690 n=5+5) GoParser 315k ± 1% 315k ± 1% ~ (p=0.841 n=5+5) Reflect 970k ± 0% 970k ± 0% ~ (p=0.841 n=5+5) Tar 248k ± 0% 248k ± 1% ~ (p=0.841 n=5+5) XML 389k ± 0% 389k ± 0% ~ (p=1.000 n=5+5) [Geo mean] 724k 724k +0.01% Updates #15756 Change-Id: I2646332e89f0444ca9d5a41d7172537d904ed636 Reviewed-on: https://go-review.googlesource.com/41050 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2017-04-17 13:52:01 -07:00
hash map[string]*LSym // name -> sym mapping for version == 0
vhash map[string]*LSym // name -> sym mapping for version == 1
2016-12-15 17:17:01 -08:00
PosTable src.PosTable
cmd/compile,link: generate PC-value tables with inlining information In order to generate accurate tracebacks, the runtime needs to know the inlined call stack for a given PC. This creates two tables per function for this purpose. The first table is the inlining tree (stored in the function's funcdata), which has a node containing the file, line, and function name for every inlined call. The second table is a PC-value table that maps each PC to a node in the inlining tree (or -1 if the PC is not the result of inlining). To give the appearance that inlining hasn't happened, the runtime also needs the original source position information of inlined AST nodes. Previously the compiler plastered over the line numbers of inlined AST nodes with the line number of the call. This meant that the PC-line table mapped each PC to line number of the outermost call in its inlined call stack, with no way to access the innermost line number. Now the compiler retains line numbers of inlined AST nodes and writes the innermost source position information to the PC-line and PC-file tables. Some tools and tests expect to see outermost line numbers, so we provide the OutermostLine function for displaying line info. To keep track of the inlined call stack for an AST node, we extend the src.PosBase type with an index into a global inlining tree. Every time the compiler inlines a call, it creates a node in the global inlining tree for the call, and writes its index to the PosBase of every inlined AST node. The parent of this node is the inlining tree index of the call. -1 signifies no parent. For each function, the compiler creates a local inlining tree and a PC-value table mapping each PC to an index in the local tree. These are written to an object file, which is read by the linker. The linker re-encodes these tables compactly by deduplicating function names and file names. This change increases the size of binaries by 4-5%. For example, this is how the go1 benchmark binary is impacted by this change: section old bytes new bytes delta .text 3.49M ± 0% 3.49M ± 0% +0.06% .rodata 1.12M ± 0% 1.21M ± 0% +8.21% .gopclntab 1.50M ± 0% 1.68M ± 0% +11.89% .debug_line 338k ± 0% 435k ± 0% +28.78% Total 9.21M ± 0% 9.58M ± 0% +4.01% Updates #19348. Change-Id: Ic4f180c3b516018138236b0c35e0218270d957d3 Reviewed-on: https://go-review.googlesource.com/37231 Run-TryBot: David Lazar <lazard@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2017-02-17 12:28:05 -05:00
InlTree InlTree // global inlining tree used by gc/inl.go
Imports []string
DiagFunc func(string, ...interface{})
DebugInfo func(fn *LSym, curfn interface{}) []*dwarf.Var // if non-nil, curfn is a *gc.Node
Errors int
Framepointer_enabled bool
// state for writing objects
Text []*LSym
Data []*LSym
}
func (ctxt *Link) Diag(format string, args ...interface{}) {
ctxt.Errors++
ctxt.DiagFunc(format, args...)
}
func (ctxt *Link) Logf(format string, args ...interface{}) {
fmt.Fprintf(ctxt.Bso, format, args...)
ctxt.Bso.Flush()
}
// The smallest possible offset from the hardware stack pointer to a local
// variable on the stack. Architectures that use a link register save its value
// on the stack in the function prologue and so always have a pointer between
// the hardware stack pointer and the local variable area.
func (ctxt *Link) FixedFrameSize() int64 {
switch ctxt.Arch.Family {
case sys.AMD64, sys.I386:
return 0
case sys.PPC64:
// PIC code on ppc64le requires 32 bytes of stack, and it's easier to
// just use that much stack always on ppc64x.
return int64(4 * ctxt.Arch.PtrSize)
default:
return int64(ctxt.Arch.PtrSize)
}
}
// LinkArch is the definition of a single architecture.
type LinkArch struct {
*sys.Arch
Init func(*Link)
cmd/compile: teach assemblers to accept a Prog allocator The existing bulk Prog allocator is not concurrency-safe. To allow for concurrency-safe bulk allocation of Progs, I want to move Prog allocation and caching upstream, to the clients of cmd/internal/obj. This is a preliminary enabling refactoring. After this CL, instead of calling Ctxt.NewProg throughout the assemblers, we thread through a newprog function that returns a new Prog. That function is set up to be Ctxt.NewProg, so there are no real changes in this CL; this CL only establishes the plumbing. Passes toolstash-check -all. Negligible compiler performance impact. Updates #15756 name old time/op new time/op delta Template 213ms ± 3% 214ms ± 4% ~ (p=0.574 n=49+47) Unicode 90.1ms ± 5% 89.9ms ± 4% ~ (p=0.417 n=50+49) GoTypes 585ms ± 4% 584ms ± 3% ~ (p=0.466 n=49+49) SSA 6.50s ± 3% 6.52s ± 2% ~ (p=0.251 n=49+49) Flate 128ms ± 4% 128ms ± 4% ~ (p=0.673 n=49+50) GoParser 152ms ± 3% 152ms ± 3% ~ (p=0.810 n=48+49) Reflect 372ms ± 4% 372ms ± 5% ~ (p=0.778 n=49+50) Tar 113ms ± 5% 111ms ± 4% -0.98% (p=0.016 n=50+49) XML 208ms ± 3% 208ms ± 2% ~ (p=0.483 n=47+49) [Geo mean] 285ms 285ms -0.17% name old user-ns/op new user-ns/op delta Template 253M ± 8% 254M ± 9% ~ (p=0.899 n=50+50) Unicode 106M ± 9% 106M ±11% ~ (p=0.642 n=50+50) GoTypes 736M ± 4% 740M ± 4% ~ (p=0.121 n=50+49) SSA 8.82G ± 3% 8.88G ± 2% +0.65% (p=0.006 n=49+48) Flate 147M ± 4% 147M ± 5% ~ (p=0.844 n=47+48) GoParser 179M ± 4% 178M ± 6% ~ (p=0.785 n=50+50) Reflect 443M ± 6% 441M ± 5% ~ (p=0.850 n=48+47) Tar 126M ± 5% 126M ± 5% ~ (p=0.734 n=50+50) XML 244M ± 5% 244M ± 5% ~ (p=0.594 n=49+50) [Geo mean] 341M 341M +0.11% Change-Id: Ice962f61eb3a524c2db00a166cb582c22caa7d68 Reviewed-on: https://go-review.googlesource.com/39633 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2017-04-04 14:31:55 -07:00
Preprocess func(*Link, *LSym, ProgAlloc)
Assemble func(*Link, *LSym, ProgAlloc)
Progedit func(*Link, *Prog, ProgAlloc)
UnaryDst map[As]bool // Instruction takes one operand, a destination.
}