mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.link] cmd/link: stream out external relocations on AMD64 ELF
Currently, when external linking, in relocsym (in asmb pass), we convert Go relocations to an in-memory representation of external relocations, and then in asmb2 pass we write them out to the output file. This is not memory efficient. This CL makes it not do the conversion but directly stream out the external relocations based on Go relocations. Currently only do this on AMD64 ELF systems. This reduces memory usage, but makes the asmb2 pass a little slower. Linking cmd/compile with external linking: name old time/op new time/op delta Asmb_GC 83.8ms ± 7% 70.4ms ± 4% -16.03% (p=0.008 n=5+5) Asmb2_GC 95.6ms ± 4% 118.2ms ± 5% +23.65% (p=0.008 n=5+5) TotalTime_GC 1.59s ± 2% 1.62s ± 1% ~ (p=0.151 n=5+5) name old alloc/op new alloc/op delta Asmb_GC 26.0MB ± 0% 4.1MB ± 0% -84.15% (p=0.008 n=5+5) Asmb2_GC 8.19MB ± 0% 8.18MB ± 0% ~ (p=0.222 n=5+5) name old live-B new live-B delta Asmb_GC 49.2M ± 0% 27.4M ± 0% -44.38% (p=0.008 n=5+5) Asmb2_GC 51.5M ± 0% 29.7M ± 0% -42.33% (p=0.008 n=5+5) TODO: figure out what is slow. Possible improvements: - Remove redundant work in relocsym. - Maybe there is a better representation for external relocations now. - Fine-grained parallelism in emitting external relocations. - The old elfrelocsect only iterates over external relocations, now we iterate over all relocations. Is it too many? Change-Id: Ib0a8ee8c88d65864c62b89a8d634614f7f2c813e Reviewed-on: https://go-review.googlesource.com/c/go/+/242603 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Jeremy Faller <jeremy@golang.org>
This commit is contained in:
parent
4f217d5aaa
commit
88382a9f97
4 changed files with 170 additions and 21 deletions
|
|
@ -19,7 +19,7 @@ import (
|
||||||
// This function handles the first part.
|
// This function handles the first part.
|
||||||
func asmb(ctxt *Link) {
|
func asmb(ctxt *Link) {
|
||||||
ctxt.loader.InitOutData()
|
ctxt.loader.InitOutData()
|
||||||
if ctxt.IsExternal() {
|
if ctxt.IsExternal() && !(ctxt.IsAMD64() && ctxt.IsELF) {
|
||||||
ctxt.loader.InitExtRelocs()
|
ctxt.loader.InitExtRelocs()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,7 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
|
||||||
target := st.target
|
target := st.target
|
||||||
syms := st.syms
|
syms := st.syms
|
||||||
var extRelocs []loader.ExtReloc
|
var extRelocs []loader.ExtReloc
|
||||||
if target.IsExternal() {
|
if target.IsExternal() && !(target.IsAMD64() && target.IsELF) {
|
||||||
// preallocate a slice conservatively assuming that all
|
// preallocate a slice conservatively assuming that all
|
||||||
// relocs will require an external reloc
|
// relocs will require an external reloc
|
||||||
extRelocs = st.preallocExtRelocSlice(relocs.Count())
|
extRelocs = st.preallocExtRelocSlice(relocs.Count())
|
||||||
|
|
@ -592,16 +592,137 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
|
||||||
|
|
||||||
addExtReloc:
|
addExtReloc:
|
||||||
if needExtReloc {
|
if needExtReloc {
|
||||||
extRelocs = append(extRelocs, rr)
|
if target.IsAMD64() && target.IsELF {
|
||||||
|
extraExtReloc++
|
||||||
|
} else {
|
||||||
|
extRelocs = append(extRelocs, rr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(extRelocs) != 0 {
|
if target.IsExternal() && target.IsAMD64() && target.IsELF {
|
||||||
|
// On AMD64 ELF, we'll stream out the external relocations in elfrelocsect
|
||||||
|
// and we only need the count here.
|
||||||
|
// TODO: just count, but not compute the external relocations. For now it
|
||||||
|
// is still needed on other platforms, and this keeps the code simple.
|
||||||
|
atomic.AddUint32(&ldr.SymSect(s).Relcount, uint32(extraExtReloc))
|
||||||
|
} else if len(extRelocs) != 0 {
|
||||||
st.finalizeExtRelocSlice(extRelocs)
|
st.finalizeExtRelocSlice(extRelocs)
|
||||||
ldr.SetExtRelocs(s, extRelocs)
|
ldr.SetExtRelocs(s, extRelocs)
|
||||||
atomic.AddUint32(&ldr.SymSect(s).Relcount, uint32(len(extRelocs)+extraExtReloc))
|
atomic.AddUint32(&ldr.SymSect(s).Relcount, uint32(len(extRelocs)+extraExtReloc))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert a Go relocation to an external relocation.
|
||||||
|
func extreloc(ctxt *Link, ldr *loader.Loader, s loader.Sym, r loader.Reloc2, ri int) (loader.ExtReloc, bool) {
|
||||||
|
var rr loader.ExtReloc
|
||||||
|
target := ctxt.Target
|
||||||
|
siz := int32(r.Siz())
|
||||||
|
if siz == 0 { // informational relocation - no work to do
|
||||||
|
return rr, false
|
||||||
|
}
|
||||||
|
|
||||||
|
rt := r.Type()
|
||||||
|
if rt >= objabi.ElfRelocOffset {
|
||||||
|
return rr, false
|
||||||
|
}
|
||||||
|
|
||||||
|
rr.Idx = ri
|
||||||
|
|
||||||
|
// TODO(mundaym): remove this special case - see issue 14218.
|
||||||
|
if target.IsS390X() {
|
||||||
|
switch rt {
|
||||||
|
case objabi.R_PCRELDBL:
|
||||||
|
rt = objabi.R_PCREL
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch rt {
|
||||||
|
default:
|
||||||
|
// TODO: handle arch-specific relocations
|
||||||
|
panic("unsupported")
|
||||||
|
|
||||||
|
case objabi.R_TLS_LE, objabi.R_TLS_IE:
|
||||||
|
if target.IsElf() {
|
||||||
|
rs := ldr.ResolveABIAlias(r.Sym())
|
||||||
|
rr.Xsym = rs
|
||||||
|
if rr.Xsym == 0 {
|
||||||
|
rr.Xsym = ctxt.Tlsg
|
||||||
|
}
|
||||||
|
rr.Xadd = r.Add()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return rr, false
|
||||||
|
|
||||||
|
case objabi.R_ADDR:
|
||||||
|
// set up addend for eventual relocation via outer symbol.
|
||||||
|
rs := ldr.ResolveABIAlias(r.Sym())
|
||||||
|
rs, off := FoldSubSymbolOffset(ldr, rs)
|
||||||
|
rr.Xadd = r.Add() + off
|
||||||
|
rst := ldr.SymType(rs)
|
||||||
|
if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && rst != sym.SUNDEFEXT && ldr.SymSect(rs) == nil {
|
||||||
|
ldr.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
|
||||||
|
}
|
||||||
|
rr.Xsym = rs
|
||||||
|
|
||||||
|
case objabi.R_DWARFSECREF:
|
||||||
|
// On most platforms, the external linker needs to adjust DWARF references
|
||||||
|
// as it combines DWARF sections. However, on Darwin, dsymutil does the
|
||||||
|
// DWARF linking, and it understands how to follow section offsets.
|
||||||
|
// Leaving in the relocation records confuses it (see
|
||||||
|
// https://golang.org/issue/22068) so drop them for Darwin.
|
||||||
|
if target.IsDarwin() {
|
||||||
|
return rr, false
|
||||||
|
}
|
||||||
|
rs := ldr.ResolveABIAlias(r.Sym())
|
||||||
|
rr.Xsym = loader.Sym(ldr.SymSect(rs).Sym)
|
||||||
|
rr.Xadd = r.Add() + ldr.SymValue(rs) - int64(ldr.SymSect(rs).Vaddr)
|
||||||
|
|
||||||
|
// r.Sym() can be 0 when CALL $(constant) is transformed from absolute PC to relative PC call.
|
||||||
|
case objabi.R_GOTPCREL, objabi.R_CALL, objabi.R_PCREL:
|
||||||
|
rs := ldr.ResolveABIAlias(r.Sym())
|
||||||
|
if rt == objabi.R_GOTPCREL && target.IsDynlinkingGo() && target.IsDarwin() && rs != 0 {
|
||||||
|
rr.Xadd = r.Add()
|
||||||
|
rr.Xadd -= int64(siz) // relative to address after the relocated chunk
|
||||||
|
rr.Xsym = rs
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if rs != 0 && ldr.SymType(rs) == sym.SUNDEFEXT {
|
||||||
|
// pass through to the external linker.
|
||||||
|
rr.Xadd = 0
|
||||||
|
if target.IsElf() {
|
||||||
|
rr.Xadd -= int64(siz)
|
||||||
|
}
|
||||||
|
rr.Xsym = rs
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if rs != 0 && (ldr.SymSect(rs) != ldr.SymSect(s) || rt == objabi.R_GOTPCREL) {
|
||||||
|
// set up addend for eventual relocation via outer symbol.
|
||||||
|
rs := rs
|
||||||
|
rs, off := FoldSubSymbolOffset(ldr, rs)
|
||||||
|
rr.Xadd = r.Add() + off
|
||||||
|
rr.Xadd -= int64(siz) // relative to address after the relocated chunk
|
||||||
|
rst := ldr.SymType(rs)
|
||||||
|
if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && ldr.SymSect(rs) == nil {
|
||||||
|
ldr.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
|
||||||
|
}
|
||||||
|
rr.Xsym = rs
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return rr, false
|
||||||
|
|
||||||
|
case objabi.R_XCOFFREF:
|
||||||
|
rs := ldr.ResolveABIAlias(r.Sym())
|
||||||
|
rr.Xsym = rs
|
||||||
|
rr.Xadd = r.Add()
|
||||||
|
|
||||||
|
// These reloc types don't need external relocations.
|
||||||
|
case objabi.R_ADDROFF, objabi.R_WEAKADDROFF, objabi.R_METHODOFF, objabi.R_ADDRCUOFF,
|
||||||
|
objabi.R_SIZE, objabi.R_CONST, objabi.R_GOTOFF:
|
||||||
|
return rr, false
|
||||||
|
}
|
||||||
|
return rr, true
|
||||||
|
}
|
||||||
|
|
||||||
const extRelocSlabSize = 2048
|
const extRelocSlabSize = 2048
|
||||||
|
|
||||||
// relocSymState hold state information needed when making a series of
|
// relocSymState hold state information needed when making a series of
|
||||||
|
|
|
||||||
|
|
@ -1372,22 +1372,50 @@ func elfrelocsect(ctxt *Link, out *OutBuf, sect *sym.Section, syms []loader.Sym)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
relocs := ldr.ExtRelocs(s)
|
if ctxt.IsAMD64() {
|
||||||
for ri := 0; ri < relocs.Count(); ri++ {
|
// Compute external relocations on the go, and pass to Elfreloc1
|
||||||
r := relocs.At(ri)
|
// to stream out.
|
||||||
if r.Xsym == 0 {
|
relocs := ldr.Relocs(s)
|
||||||
ldr.Errorf(s, "missing xsym in relocation")
|
for ri := 0; ri < relocs.Count(); ri++ {
|
||||||
continue
|
r := relocs.At2(ri)
|
||||||
|
rr, ok := extreloc(ctxt, ldr, s, r, ri)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rr.Xsym == 0 {
|
||||||
|
ldr.Errorf(s, "missing xsym in relocation")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
esr := ElfSymForReloc(ctxt, rr.Xsym)
|
||||||
|
if esr == 0 {
|
||||||
|
ldr.Errorf(s, "reloc %d (%s) to non-elf symbol %s (outer=%s) %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Sym()), ldr.SymName(rr.Xsym), ldr.SymType(r.Sym()), ldr.SymType(r.Sym()).String())
|
||||||
|
}
|
||||||
|
if !ldr.AttrReachable(rr.Xsym) {
|
||||||
|
ldr.Errorf(s, "unreachable reloc %d (%s) target %v", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(rr.Xsym))
|
||||||
|
}
|
||||||
|
rv := loader.ExtRelocView{Reloc2: r, ExtReloc: rr}
|
||||||
|
if !thearch.Elfreloc1(ctxt, out, ldr, s, rv, int64(uint64(ldr.SymValue(s)+int64(r.Off()))-sect.Vaddr)) {
|
||||||
|
ldr.Errorf(s, "unsupported obj reloc %d (%s)/%d to %s", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), r.Siz(), ldr.SymName(r.Sym()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
esr := ElfSymForReloc(ctxt, r.Xsym)
|
} else {
|
||||||
if esr == 0 {
|
relocs := ldr.ExtRelocs(s)
|
||||||
ldr.Errorf(s, "reloc %d (%s) to non-elf symbol %s (outer=%s) %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Sym()), ldr.SymName(r.Xsym), ldr.SymType(r.Sym()), ldr.SymType(r.Sym()).String())
|
for ri := 0; ri < relocs.Count(); ri++ {
|
||||||
}
|
r := relocs.At(ri)
|
||||||
if !ldr.AttrReachable(r.Xsym) {
|
if r.Xsym == 0 {
|
||||||
ldr.Errorf(s, "unreachable reloc %d (%s) target %v", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Xsym))
|
ldr.Errorf(s, "missing xsym in relocation")
|
||||||
}
|
continue
|
||||||
if !thearch.Elfreloc1(ctxt, out, ldr, s, r, int64(uint64(ldr.SymValue(s)+int64(r.Off()))-sect.Vaddr)) {
|
}
|
||||||
ldr.Errorf(s, "unsupported obj reloc %d (%s)/%d to %s", r.Type, sym.RelocName(ctxt.Arch, r.Type()), r.Siz(), ldr.SymName(r.Sym()))
|
esr := ElfSymForReloc(ctxt, r.Xsym)
|
||||||
|
if esr == 0 {
|
||||||
|
ldr.Errorf(s, "reloc %d (%s) to non-elf symbol %s (outer=%s) %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Sym()), ldr.SymName(r.Xsym), ldr.SymType(r.Sym()), ldr.SymType(r.Sym()).String())
|
||||||
|
}
|
||||||
|
if !ldr.AttrReachable(r.Xsym) {
|
||||||
|
ldr.Errorf(s, "unreachable reloc %d (%s) target %v", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Xsym))
|
||||||
|
}
|
||||||
|
if !thearch.Elfreloc1(ctxt, out, ldr, s, r, int64(uint64(ldr.SymValue(s)+int64(r.Off()))-sect.Vaddr)) {
|
||||||
|
ldr.Errorf(s, "unsupported obj reloc %d (%s)/%d to %s", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), r.Siz(), ldr.SymName(r.Sym()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ type ExtReloc struct {
|
||||||
// It is not the data structure used to store the payload internally.
|
// It is not the data structure used to store the payload internally.
|
||||||
type ExtRelocView struct {
|
type ExtRelocView struct {
|
||||||
Reloc2
|
Reloc2
|
||||||
*ExtReloc
|
ExtReloc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reloc2 holds a "handle" to access a relocation record from an
|
// Reloc2 holds a "handle" to access a relocation record from an
|
||||||
|
|
@ -1909,7 +1909,7 @@ func (ers ExtRelocs) Count() int { return len(ers.es) }
|
||||||
|
|
||||||
func (ers ExtRelocs) At(j int) ExtRelocView {
|
func (ers ExtRelocs) At(j int) ExtRelocView {
|
||||||
i := ers.es[j].Idx
|
i := ers.es[j].Idx
|
||||||
return ExtRelocView{ers.rs.At2(i), &ers.es[j]}
|
return ExtRelocView{ers.rs.At2(i), ers.es[j]}
|
||||||
}
|
}
|
||||||
|
|
||||||
// RelocByOff implements sort.Interface for sorting relocations by offset.
|
// RelocByOff implements sort.Interface for sorting relocations by offset.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue