[dev.link] cmd/link: stream out external relocations on AMD64 ELF

Currently, when external linking, in relocsym (in asmb pass), we
convert Go relocations to an in-memory representation of external
relocations, and then in asmb2 pass we write them out to the
output file. This is not memory efficient.

This CL makes it not do the conversion but directly stream out
the external relocations based on Go relocations. Currently only
do this on AMD64 ELF systems.

This reduces memory usage, but makes the asmb2 pass a little
slower.

Linking cmd/compile with external linking:

name             old time/op    new time/op    delta
Asmb_GC            83.8ms ± 7%    70.4ms ± 4%  -16.03%  (p=0.008 n=5+5)
Asmb2_GC           95.6ms ± 4%   118.2ms ± 5%  +23.65%  (p=0.008 n=5+5)
TotalTime_GC        1.59s ± 2%     1.62s ± 1%     ~     (p=0.151 n=5+5)

name             old alloc/op   new alloc/op   delta
Asmb_GC            26.0MB ± 0%     4.1MB ± 0%  -84.15%  (p=0.008 n=5+5)
Asmb2_GC           8.19MB ± 0%    8.18MB ± 0%     ~     (p=0.222 n=5+5)

name             old live-B     new live-B     delta
Asmb_GC             49.2M ± 0%     27.4M ± 0%  -44.38%  (p=0.008 n=5+5)
Asmb2_GC            51.5M ± 0%     29.7M ± 0%  -42.33%  (p=0.008 n=5+5)

TODO: figure out what is slow. Possible improvements:
- Remove redundant work in relocsym.
- Maybe there is a better representation for external relocations
  now.
- Fine-grained parallelism in emitting external relocations.
- The old elfrelocsect only iterates over external relocations,
  now we iterate over all relocations. Is it too many?

Change-Id: Ib0a8ee8c88d65864c62b89a8d634614f7f2c813e
Reviewed-on: https://go-review.googlesource.com/c/go/+/242603
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Jeremy Faller <jeremy@golang.org>
This commit is contained in:
Cherry Zhang 2020-07-14 14:46:59 -04:00
parent 4f217d5aaa
commit 88382a9f97
4 changed files with 170 additions and 21 deletions

View file

@ -19,7 +19,7 @@ import (
// This function handles the first part. // This function handles the first part.
func asmb(ctxt *Link) { func asmb(ctxt *Link) {
ctxt.loader.InitOutData() ctxt.loader.InitOutData()
if ctxt.IsExternal() { if ctxt.IsExternal() && !(ctxt.IsAMD64() && ctxt.IsELF) {
ctxt.loader.InitExtRelocs() ctxt.loader.InitExtRelocs()
} }

View file

@ -159,7 +159,7 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
target := st.target target := st.target
syms := st.syms syms := st.syms
var extRelocs []loader.ExtReloc var extRelocs []loader.ExtReloc
if target.IsExternal() { if target.IsExternal() && !(target.IsAMD64() && target.IsELF) {
// preallocate a slice conservatively assuming that all // preallocate a slice conservatively assuming that all
// relocs will require an external reloc // relocs will require an external reloc
extRelocs = st.preallocExtRelocSlice(relocs.Count()) extRelocs = st.preallocExtRelocSlice(relocs.Count())
@ -592,16 +592,137 @@ func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
addExtReloc: addExtReloc:
if needExtReloc { if needExtReloc {
extRelocs = append(extRelocs, rr) if target.IsAMD64() && target.IsELF {
extraExtReloc++
} else {
extRelocs = append(extRelocs, rr)
}
} }
} }
if len(extRelocs) != 0 { if target.IsExternal() && target.IsAMD64() && target.IsELF {
// On AMD64 ELF, we'll stream out the external relocations in elfrelocsect
// and we only need the count here.
// TODO: just count, but not compute the external relocations. For now it
// is still needed on other platforms, and this keeps the code simple.
atomic.AddUint32(&ldr.SymSect(s).Relcount, uint32(extraExtReloc))
} else if len(extRelocs) != 0 {
st.finalizeExtRelocSlice(extRelocs) st.finalizeExtRelocSlice(extRelocs)
ldr.SetExtRelocs(s, extRelocs) ldr.SetExtRelocs(s, extRelocs)
atomic.AddUint32(&ldr.SymSect(s).Relcount, uint32(len(extRelocs)+extraExtReloc)) atomic.AddUint32(&ldr.SymSect(s).Relcount, uint32(len(extRelocs)+extraExtReloc))
} }
} }
// Convert a Go relocation to an external relocation.
func extreloc(ctxt *Link, ldr *loader.Loader, s loader.Sym, r loader.Reloc2, ri int) (loader.ExtReloc, bool) {
var rr loader.ExtReloc
target := ctxt.Target
siz := int32(r.Siz())
if siz == 0 { // informational relocation - no work to do
return rr, false
}
rt := r.Type()
if rt >= objabi.ElfRelocOffset {
return rr, false
}
rr.Idx = ri
// TODO(mundaym): remove this special case - see issue 14218.
if target.IsS390X() {
switch rt {
case objabi.R_PCRELDBL:
rt = objabi.R_PCREL
}
}
switch rt {
default:
// TODO: handle arch-specific relocations
panic("unsupported")
case objabi.R_TLS_LE, objabi.R_TLS_IE:
if target.IsElf() {
rs := ldr.ResolveABIAlias(r.Sym())
rr.Xsym = rs
if rr.Xsym == 0 {
rr.Xsym = ctxt.Tlsg
}
rr.Xadd = r.Add()
break
}
return rr, false
case objabi.R_ADDR:
// set up addend for eventual relocation via outer symbol.
rs := ldr.ResolveABIAlias(r.Sym())
rs, off := FoldSubSymbolOffset(ldr, rs)
rr.Xadd = r.Add() + off
rst := ldr.SymType(rs)
if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && rst != sym.SUNDEFEXT && ldr.SymSect(rs) == nil {
ldr.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
}
rr.Xsym = rs
case objabi.R_DWARFSECREF:
// On most platforms, the external linker needs to adjust DWARF references
// as it combines DWARF sections. However, on Darwin, dsymutil does the
// DWARF linking, and it understands how to follow section offsets.
// Leaving in the relocation records confuses it (see
// https://golang.org/issue/22068) so drop them for Darwin.
if target.IsDarwin() {
return rr, false
}
rs := ldr.ResolveABIAlias(r.Sym())
rr.Xsym = loader.Sym(ldr.SymSect(rs).Sym)
rr.Xadd = r.Add() + ldr.SymValue(rs) - int64(ldr.SymSect(rs).Vaddr)
// r.Sym() can be 0 when CALL $(constant) is transformed from absolute PC to relative PC call.
case objabi.R_GOTPCREL, objabi.R_CALL, objabi.R_PCREL:
rs := ldr.ResolveABIAlias(r.Sym())
if rt == objabi.R_GOTPCREL && target.IsDynlinkingGo() && target.IsDarwin() && rs != 0 {
rr.Xadd = r.Add()
rr.Xadd -= int64(siz) // relative to address after the relocated chunk
rr.Xsym = rs
break
}
if rs != 0 && ldr.SymType(rs) == sym.SUNDEFEXT {
// pass through to the external linker.
rr.Xadd = 0
if target.IsElf() {
rr.Xadd -= int64(siz)
}
rr.Xsym = rs
break
}
if rs != 0 && (ldr.SymSect(rs) != ldr.SymSect(s) || rt == objabi.R_GOTPCREL) {
// set up addend for eventual relocation via outer symbol.
rs := rs
rs, off := FoldSubSymbolOffset(ldr, rs)
rr.Xadd = r.Add() + off
rr.Xadd -= int64(siz) // relative to address after the relocated chunk
rst := ldr.SymType(rs)
if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && ldr.SymSect(rs) == nil {
ldr.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
}
rr.Xsym = rs
break
}
return rr, false
case objabi.R_XCOFFREF:
rs := ldr.ResolveABIAlias(r.Sym())
rr.Xsym = rs
rr.Xadd = r.Add()
// These reloc types don't need external relocations.
case objabi.R_ADDROFF, objabi.R_WEAKADDROFF, objabi.R_METHODOFF, objabi.R_ADDRCUOFF,
objabi.R_SIZE, objabi.R_CONST, objabi.R_GOTOFF:
return rr, false
}
return rr, true
}
const extRelocSlabSize = 2048 const extRelocSlabSize = 2048
// relocSymState hold state information needed when making a series of // relocSymState hold state information needed when making a series of

View file

@ -1372,22 +1372,50 @@ func elfrelocsect(ctxt *Link, out *OutBuf, sect *sym.Section, syms []loader.Sym)
break break
} }
relocs := ldr.ExtRelocs(s) if ctxt.IsAMD64() {
for ri := 0; ri < relocs.Count(); ri++ { // Compute external relocations on the go, and pass to Elfreloc1
r := relocs.At(ri) // to stream out.
if r.Xsym == 0 { relocs := ldr.Relocs(s)
ldr.Errorf(s, "missing xsym in relocation") for ri := 0; ri < relocs.Count(); ri++ {
continue r := relocs.At2(ri)
rr, ok := extreloc(ctxt, ldr, s, r, ri)
if !ok {
continue
}
if rr.Xsym == 0 {
ldr.Errorf(s, "missing xsym in relocation")
continue
}
esr := ElfSymForReloc(ctxt, rr.Xsym)
if esr == 0 {
ldr.Errorf(s, "reloc %d (%s) to non-elf symbol %s (outer=%s) %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Sym()), ldr.SymName(rr.Xsym), ldr.SymType(r.Sym()), ldr.SymType(r.Sym()).String())
}
if !ldr.AttrReachable(rr.Xsym) {
ldr.Errorf(s, "unreachable reloc %d (%s) target %v", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(rr.Xsym))
}
rv := loader.ExtRelocView{Reloc2: r, ExtReloc: rr}
if !thearch.Elfreloc1(ctxt, out, ldr, s, rv, int64(uint64(ldr.SymValue(s)+int64(r.Off()))-sect.Vaddr)) {
ldr.Errorf(s, "unsupported obj reloc %d (%s)/%d to %s", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), r.Siz(), ldr.SymName(r.Sym()))
}
} }
esr := ElfSymForReloc(ctxt, r.Xsym) } else {
if esr == 0 { relocs := ldr.ExtRelocs(s)
ldr.Errorf(s, "reloc %d (%s) to non-elf symbol %s (outer=%s) %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Sym()), ldr.SymName(r.Xsym), ldr.SymType(r.Sym()), ldr.SymType(r.Sym()).String()) for ri := 0; ri < relocs.Count(); ri++ {
} r := relocs.At(ri)
if !ldr.AttrReachable(r.Xsym) { if r.Xsym == 0 {
ldr.Errorf(s, "unreachable reloc %d (%s) target %v", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Xsym)) ldr.Errorf(s, "missing xsym in relocation")
} continue
if !thearch.Elfreloc1(ctxt, out, ldr, s, r, int64(uint64(ldr.SymValue(s)+int64(r.Off()))-sect.Vaddr)) { }
ldr.Errorf(s, "unsupported obj reloc %d (%s)/%d to %s", r.Type, sym.RelocName(ctxt.Arch, r.Type()), r.Siz(), ldr.SymName(r.Sym())) esr := ElfSymForReloc(ctxt, r.Xsym)
if esr == 0 {
ldr.Errorf(s, "reloc %d (%s) to non-elf symbol %s (outer=%s) %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Sym()), ldr.SymName(r.Xsym), ldr.SymType(r.Sym()), ldr.SymType(r.Sym()).String())
}
if !ldr.AttrReachable(r.Xsym) {
ldr.Errorf(s, "unreachable reloc %d (%s) target %v", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), ldr.SymName(r.Xsym))
}
if !thearch.Elfreloc1(ctxt, out, ldr, s, r, int64(uint64(ldr.SymValue(s)+int64(r.Off()))-sect.Vaddr)) {
ldr.Errorf(s, "unsupported obj reloc %d (%s)/%d to %s", r.Type(), sym.RelocName(ctxt.Arch, r.Type()), r.Siz(), ldr.SymName(r.Sym()))
}
} }
} }
} }

View file

@ -60,7 +60,7 @@ type ExtReloc struct {
// It is not the data structure used to store the payload internally. // It is not the data structure used to store the payload internally.
type ExtRelocView struct { type ExtRelocView struct {
Reloc2 Reloc2
*ExtReloc ExtReloc
} }
// Reloc2 holds a "handle" to access a relocation record from an // Reloc2 holds a "handle" to access a relocation record from an
@ -1909,7 +1909,7 @@ func (ers ExtRelocs) Count() int { return len(ers.es) }
func (ers ExtRelocs) At(j int) ExtRelocView { func (ers ExtRelocs) At(j int) ExtRelocView {
i := ers.es[j].Idx i := ers.es[j].Idx
return ExtRelocView{ers.rs.At2(i), &ers.es[j]} return ExtRelocView{ers.rs.At2(i), ers.es[j]}
} }
// RelocByOff implements sort.Interface for sorting relocations by offset. // RelocByOff implements sort.Interface for sorting relocations by offset.