mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/link: use a two-pass approach for trampoline insertion
Currently in the linker, for trampoline insertion it does a one-pass approach, where it assigns addresses for each function and inserts trampolines on the go. For this to work and not to emit too many unnecessary trampolines, the functions need to be laid out in dependency order, so a direct call's target is always as a known address (or known to be not too far). This mostly works, but there are a few exceptions: - linkname can break dependency tree and cause cycles. - in internal linking mode, on some platforms, some calls are turned into calls via PLT, but the PLT stubs are inserted rather late. Also, this is expensive in that it has to investigate all CALL relocations. This CL changes it to use a two-pass approach. The first pass is just to assign addresses without inserting any trampolines, assuming the program is not too big. If this succeeds, no extra work needs to be done. If this fails, start over and insert trampolines for too- far targets as well as targets with unknown addresses. This should make it faster for small programs (most cases) and generate fewer conservative trampolines. Change-Id: Ib13e01f38ec6dfbef1cd446b06da33ee17bded5d Reviewed-on: https://go-review.googlesource.com/c/go/+/314450 Trust: Cherry Zhang <cherryyz@google.com> Reviewed-by: Than McIntosh <thanm@google.com>
This commit is contained in:
parent
d80da19fc9
commit
18852e8372
4 changed files with 103 additions and 44 deletions
|
|
@ -45,6 +45,7 @@ func Init() (*sys.Arch, ld.Arch) {
|
||||||
Minalign: minAlign,
|
Minalign: minAlign,
|
||||||
Dwarfregsp: dwarfRegSP,
|
Dwarfregsp: dwarfRegSP,
|
||||||
Dwarfreglr: dwarfRegLR,
|
Dwarfreglr: dwarfRegLR,
|
||||||
|
TrampLimit: 0x1c00000, // 24-bit signed offset * 4, leave room for PLT etc.
|
||||||
|
|
||||||
Plan9Magic: 0x647,
|
Plan9Magic: 0x647,
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,6 @@ func trampoline(ctxt *Link, s loader.Sym) {
|
||||||
|
|
||||||
thearch.Trampoline(ctxt, ldr, ri, rs, s)
|
thearch.Trampoline(ctxt, ldr, ri, rs, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FoldSubSymbolOffset computes the offset of symbol s to its top-level outer
|
// FoldSubSymbolOffset computes the offset of symbol s to its top-level outer
|
||||||
|
|
@ -2203,12 +2202,60 @@ func (ctxt *Link) textaddress() {
|
||||||
ctxt.Textp[0] = text
|
ctxt.Textp[0] = text
|
||||||
}
|
}
|
||||||
|
|
||||||
va := uint64(Rnd(*FlagTextAddr, int64(Funcalign)))
|
start := uint64(Rnd(*FlagTextAddr, int64(Funcalign)))
|
||||||
|
va := start
|
||||||
n := 1
|
n := 1
|
||||||
sect.Vaddr = va
|
sect.Vaddr = va
|
||||||
|
|
||||||
|
limit := thearch.TrampLimit
|
||||||
|
if limit == 0 {
|
||||||
|
limit = 1 << 63 // unlimited
|
||||||
|
}
|
||||||
|
if *FlagDebugTextSize != 0 {
|
||||||
|
limit = uint64(*FlagDebugTextSize)
|
||||||
|
}
|
||||||
|
if *FlagDebugTramp > 1 {
|
||||||
|
limit = 1 // debug mode, force generating trampolines for everything
|
||||||
|
}
|
||||||
|
|
||||||
|
if ctxt.IsAIX() && ctxt.IsExternal() {
|
||||||
|
// On AIX, normally we won't generate direct calls to external symbols,
|
||||||
|
// except in one test, cmd/go/testdata/script/link_syso_issue33139.txt.
|
||||||
|
// That test doesn't make much sense, and I'm not sure it ever works.
|
||||||
|
// Just generate trampoline for now (which will turn a direct call to
|
||||||
|
// an indirect call, which at least builds).
|
||||||
|
limit = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// First pass: assign addresses assuming the program is small and
|
||||||
|
// don't generate trampolines.
|
||||||
|
big := false
|
||||||
|
for _, s := range ctxt.Textp {
|
||||||
|
sect, n, va = assignAddress(ctxt, sect, n, s, va, false, big)
|
||||||
|
if va-start >= limit {
|
||||||
|
big = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second pass: only if it is too big, insert trampolines for too-far
|
||||||
|
// jumps and targets with unknown addresses.
|
||||||
|
if big {
|
||||||
|
// reset addresses
|
||||||
|
for _, s := range ctxt.Textp {
|
||||||
|
if ldr.OuterSym(s) != 0 || s == text {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
oldv := ldr.SymValue(s)
|
||||||
|
for sub := s; sub != 0; sub = ldr.SubSym(sub) {
|
||||||
|
ldr.SetSymValue(sub, ldr.SymValue(sub)-oldv)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
va = start
|
||||||
|
|
||||||
ntramps := 0
|
ntramps := 0
|
||||||
for _, s := range ctxt.Textp {
|
for _, s := range ctxt.Textp {
|
||||||
sect, n, va = assignAddress(ctxt, sect, n, s, va, false)
|
sect, n, va = assignAddress(ctxt, sect, n, s, va, false, big)
|
||||||
|
|
||||||
trampoline(ctxt, s) // resolve jumps, may add trampolines if jump too far
|
trampoline(ctxt, s) // resolve jumps, may add trampolines if jump too far
|
||||||
|
|
||||||
|
|
@ -2219,19 +2266,10 @@ func (ctxt *Link) textaddress() {
|
||||||
// Already set in assignAddress
|
// Already set in assignAddress
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
sect, n, va = assignAddress(ctxt, sect, n, tramp, va, true)
|
sect, n, va = assignAddress(ctxt, sect, n, tramp, va, true, big)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sect.Length = va - sect.Vaddr
|
|
||||||
ldr.SetSymSect(etext, sect)
|
|
||||||
if ldr.SymValue(etext) == 0 {
|
|
||||||
// Set the address of the start/end symbols, if not already
|
|
||||||
// (i.e. not darwin+dynlink or AIX+external, see above).
|
|
||||||
ldr.SetSymValue(etext, int64(va))
|
|
||||||
ldr.SetSymValue(text, int64(Segtext.Sections[0].Vaddr))
|
|
||||||
}
|
|
||||||
|
|
||||||
// merge tramps into Textp, keeping Textp in address order
|
// merge tramps into Textp, keeping Textp in address order
|
||||||
if ntramps != 0 {
|
if ntramps != 0 {
|
||||||
newtextp := make([]loader.Sym, 0, len(ctxt.Textp)+ntramps)
|
newtextp := make([]loader.Sym, 0, len(ctxt.Textp)+ntramps)
|
||||||
|
|
@ -2246,10 +2284,20 @@ func (ctxt *Link) textaddress() {
|
||||||
|
|
||||||
ctxt.Textp = newtextp
|
ctxt.Textp = newtextp
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sect.Length = va - sect.Vaddr
|
||||||
|
ldr.SetSymSect(etext, sect)
|
||||||
|
if ldr.SymValue(etext) == 0 {
|
||||||
|
// Set the address of the start/end symbols, if not already
|
||||||
|
// (i.e. not darwin+dynlink or AIX+external, see above).
|
||||||
|
ldr.SetSymValue(etext, int64(va))
|
||||||
|
ldr.SetSymValue(text, int64(Segtext.Sections[0].Vaddr))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// assigns address for a text symbol, returns (possibly new) section, its number, and the address
|
// assigns address for a text symbol, returns (possibly new) section, its number, and the address
|
||||||
func assignAddress(ctxt *Link, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp bool) (*sym.Section, int, uint64) {
|
func assignAddress(ctxt *Link, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp, big bool) (*sym.Section, int, uint64) {
|
||||||
ldr := ctxt.loader
|
ldr := ctxt.loader
|
||||||
if thearch.AssignAddress != nil {
|
if thearch.AssignAddress != nil {
|
||||||
return thearch.AssignAddress(ldr, sect, n, s, va, isTramp)
|
return thearch.AssignAddress(ldr, sect, n, s, va, isTramp)
|
||||||
|
|
@ -2278,19 +2326,18 @@ func assignAddress(ctxt *Link, sect *sym.Section, n int, s loader.Sym, va uint64
|
||||||
// call target offset field in the bl instruction. Splitting into smaller text
|
// call target offset field in the bl instruction. Splitting into smaller text
|
||||||
// sections smaller than this limit allows the GNU linker to modify the long calls
|
// sections smaller than this limit allows the GNU linker to modify the long calls
|
||||||
// appropriately. The limit allows for the space needed for tables inserted by the linker.
|
// appropriately. The limit allows for the space needed for tables inserted by the linker.
|
||||||
|
//
|
||||||
// If this function doesn't fit in the current text section, then create a new one.
|
// If this function doesn't fit in the current text section, then create a new one.
|
||||||
|
//
|
||||||
// Only break at outermost syms.
|
// Only break at outermost syms.
|
||||||
|
if ctxt.Arch.InFamily(sys.PPC64) && ldr.OuterSym(s) == 0 && ctxt.IsExternal() && big {
|
||||||
// For debugging purposes, allow text size limit to be cranked down,
|
// For debugging purposes, allow text size limit to be cranked down,
|
||||||
// so as to stress test the code that handles multiple text sections.
|
// so as to stress test the code that handles multiple text sections.
|
||||||
var textSizelimit uint64 = 0x1c00000
|
var textSizelimit uint64 = thearch.TrampLimit
|
||||||
if *FlagDebugTextSize != 0 {
|
if *FlagDebugTextSize != 0 {
|
||||||
textSizelimit = uint64(*FlagDebugTextSize)
|
textSizelimit = uint64(*FlagDebugTextSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ctxt.Arch.InFamily(sys.PPC64) && ldr.OuterSym(s) == 0 && ctxt.IsExternal() {
|
|
||||||
// Sanity check: make sure the limit is larger than any
|
// Sanity check: make sure the limit is larger than any
|
||||||
// individual text symbol.
|
// individual text symbol.
|
||||||
if funcsize > textSizelimit {
|
if funcsize > textSizelimit {
|
||||||
|
|
@ -2346,6 +2393,9 @@ func assignAddress(ctxt *Link, sect *sym.Section, n int, s loader.Sym, va uint64
|
||||||
ldr.SetSymValue(s, 0)
|
ldr.SetSymValue(s, 0)
|
||||||
for sub := s; sub != 0; sub = ldr.SubSym(sub) {
|
for sub := s; sub != 0; sub = ldr.SubSym(sub) {
|
||||||
ldr.SetSymValue(sub, ldr.SymValue(sub)+int64(va))
|
ldr.SetSymValue(sub, ldr.SymValue(sub)+int64(va))
|
||||||
|
if ctxt.Debugvlog > 2 {
|
||||||
|
fmt.Println("assign text address:", ldr.SymName(sub), ldr.SymValue(sub))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
va += funcsize
|
va += funcsize
|
||||||
|
|
|
||||||
|
|
@ -182,6 +182,13 @@ type Arch struct {
|
||||||
Minalign int
|
Minalign int
|
||||||
Dwarfregsp int
|
Dwarfregsp int
|
||||||
Dwarfreglr int
|
Dwarfreglr int
|
||||||
|
|
||||||
|
// Threshold of total text size, used for trampoline insertion. If the total
|
||||||
|
// text size is smaller than TrampLimit, we won't need to insert trampolines.
|
||||||
|
// It is pretty close to the offset range of a direct CALL machine instruction.
|
||||||
|
// We leave some room for extra stuff like PLT stubs.
|
||||||
|
TrampLimit uint64
|
||||||
|
|
||||||
Androiddynld string
|
Androiddynld string
|
||||||
Linuxdynld string
|
Linuxdynld string
|
||||||
Freebsddynld string
|
Freebsddynld string
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,7 @@ func Init() (*sys.Arch, ld.Arch) {
|
||||||
Minalign: minAlign,
|
Minalign: minAlign,
|
||||||
Dwarfregsp: dwarfRegSP,
|
Dwarfregsp: dwarfRegSP,
|
||||||
Dwarfreglr: dwarfRegLR,
|
Dwarfreglr: dwarfRegLR,
|
||||||
|
TrampLimit: 0x1c00000,
|
||||||
WriteTextBlocks: true,
|
WriteTextBlocks: true,
|
||||||
|
|
||||||
Adddynrel: adddynrel,
|
Adddynrel: adddynrel,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue