mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/link: faster algorithm for nosplit stack checking, better errors
The linker performs a global analysis of all nosplit call chains to
check they fit in the stack space ensured by splittable functions.
That analysis has two problems right now:
1. It's inefficient. It performs a top-down analysis, starting with
every nosplit function and the nosplit stack limit and walking *down*
the call graph to compute how much stack remains at every call. As a
result, it visits the same functions over and over, often with
different remaining stack depths. This approach is historical: this
check was originally written in C and this approach avoided the need
for any interesting data structures.
2. If some call chain is over the limit, it only reports a single call
chain. As a result, if the check does fail, you often wind up playing
whack-a-mole by guessing where the problem is in the one chain, trying
to reduce the stack size, and then seeing if the link works or reports
a different path.
This CL completely rewrites the nosplit stack check. It now uses a
bottom-up analysis, computing the maximum stack height required by
every function's call tree. This visits every function exactly once,
making it much more efficient. It uses slightly more heap space for
intermediate storage, but still very little in the scheme of the
overall link. For example, when linking cmd/go, the new algorithm
virtually eliminates the time spent in this pass, and reduces overall
link time:
│ before │ after │
│ sec/op │ sec/op vs base │
Dostkcheck 7.926m ± 4% 1.831m ± 6% -76.90% (p=0.000 n=20)
TotalTime 301.3m ± 1% 296.4m ± 3% -1.62% (p=0.040 n=20)
│ before │ after │
│ B/op │ B/op vs base │
Dostkcheck 40.00Ki ± 0% 212.15Ki ± 0% +430.37% (p=0.000 n=20)
Most of this time is spent analyzing the runtime, so for larger
binaries, the total time saved is roughly the same, and proportionally
less of the overall link.
If the new implementation finds an error, it redoes the analysis,
switching to preferring quality of error reporting over performance.
For error reporting, it computes stack depths top-down (like the old
algorithm), and reports *all* paths that are over the stack limit,
presented as a tree for compactness. For example, this is the output
from a simple test case from test/nosplit with two over-limit paths
from f1:
main.f1: nosplit stack overflow
main.f1
grows 768 bytes, calls main.f2
grows 56 bytes, calls main.f4
grows 48 bytes
80 bytes over limit
grows 768 bytes, calls main.f3
grows 104 bytes
80 bytes over limit
While we're here, we do a few nice cleanups:
- We add a debug output flag, which will be useful for understanding
what our nosplit chains look like and which ones are close to
running over.
- We move the implementation out of the fog of lib.go to its own file.
- The implementation is generally more Go-like and less C-like.
Change-Id: If1ab31197f5215475559b93695c44a01bd16e276
Reviewed-on: https://go-review.googlesource.com/c/go/+/398176
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
parent
7a06243205
commit
e25f46e596
7 changed files with 575 additions and 221 deletions
|
|
@ -34,7 +34,6 @@ import (
|
|||
"bytes"
|
||||
"cmd/internal/bio"
|
||||
"cmd/internal/goobj"
|
||||
"cmd/internal/obj"
|
||||
"cmd/internal/objabi"
|
||||
"cmd/internal/sys"
|
||||
"cmd/link/internal/loadelf"
|
||||
|
|
@ -2343,223 +2342,6 @@ func addsection(ldr *loader.Loader, arch *sys.Arch, seg *sym.Segment, name strin
|
|||
return sect
|
||||
}
|
||||
|
||||
type chain struct {
|
||||
sym loader.Sym
|
||||
up *chain
|
||||
limit int // limit on entry to sym
|
||||
}
|
||||
|
||||
func callsize(ctxt *Link) int {
|
||||
if ctxt.Arch.HasLR {
|
||||
return 0
|
||||
}
|
||||
return ctxt.Arch.RegSize
|
||||
}
|
||||
|
||||
type stkChk struct {
|
||||
ldr *loader.Loader
|
||||
ctxt *Link
|
||||
morestack loader.Sym
|
||||
done loader.Bitmap
|
||||
}
|
||||
|
||||
// Walk the call tree and check that there is always enough stack space
|
||||
// for the call frames, especially for a chain of nosplit functions.
|
||||
func (ctxt *Link) dostkcheck() {
|
||||
ldr := ctxt.loader
|
||||
sc := stkChk{
|
||||
ldr: ldr,
|
||||
ctxt: ctxt,
|
||||
morestack: ldr.Lookup("runtime.morestack", 0),
|
||||
done: loader.MakeBitmap(ldr.NSym()),
|
||||
}
|
||||
|
||||
// Every splitting function ensures that there are at least StackLimit
|
||||
// bytes available below SP when the splitting prologue finishes.
|
||||
// If the splitting function calls F, then F begins execution with
|
||||
// at least StackLimit - callsize() bytes available.
|
||||
// Check that every function behaves correctly with this amount
|
||||
// of stack, following direct calls in order to piece together chains
|
||||
// of non-splitting functions.
|
||||
var ch chain
|
||||
ch.limit = objabi.StackLimit - callsize(ctxt)
|
||||
if buildcfg.GOARCH == "arm64" {
|
||||
// need extra 8 bytes below SP to save FP
|
||||
ch.limit -= 8
|
||||
}
|
||||
|
||||
// Check every function, but do the nosplit functions in a first pass,
|
||||
// to make the printed failure chains as short as possible.
|
||||
for _, s := range ctxt.Textp {
|
||||
if ldr.IsNoSplit(s) {
|
||||
ch.sym = s
|
||||
sc.check(&ch, 0)
|
||||
}
|
||||
}
|
||||
|
||||
for _, s := range ctxt.Textp {
|
||||
if !ldr.IsNoSplit(s) {
|
||||
ch.sym = s
|
||||
sc.check(&ch, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (sc *stkChk) check(up *chain, depth int) int {
|
||||
limit := up.limit
|
||||
s := up.sym
|
||||
ldr := sc.ldr
|
||||
ctxt := sc.ctxt
|
||||
|
||||
// Don't duplicate work: only need to consider each
|
||||
// function at top of safe zone once.
|
||||
top := limit == objabi.StackLimit-callsize(ctxt)
|
||||
if top {
|
||||
if sc.done.Has(s) {
|
||||
return 0
|
||||
}
|
||||
sc.done.Set(s)
|
||||
}
|
||||
|
||||
if depth > 500 {
|
||||
sc.ctxt.Errorf(s, "nosplit stack check too deep")
|
||||
sc.broke(up, 0)
|
||||
return -1
|
||||
}
|
||||
|
||||
if ldr.AttrExternal(s) {
|
||||
// external function.
|
||||
// should never be called directly.
|
||||
// onlyctxt.Diagnose the direct caller.
|
||||
// TODO(mwhudson): actually think about this.
|
||||
// TODO(khr): disabled for now. Calls to external functions can only happen on the g0 stack.
|
||||
// See the trampolines in src/runtime/sys_darwin_$ARCH.go.
|
||||
//if depth == 1 && ldr.SymType(s) != sym.SXREF && !ctxt.DynlinkingGo() &&
|
||||
// ctxt.BuildMode != BuildModeCArchive && ctxt.BuildMode != BuildModePIE && ctxt.BuildMode != BuildModeCShared && ctxt.BuildMode != BuildModePlugin {
|
||||
// Errorf(s, "call to external function")
|
||||
//}
|
||||
return -1
|
||||
}
|
||||
info := ldr.FuncInfo(s)
|
||||
if !info.Valid() { // external function. see above.
|
||||
return -1
|
||||
}
|
||||
|
||||
if limit < 0 {
|
||||
sc.broke(up, limit)
|
||||
return -1
|
||||
}
|
||||
|
||||
// morestack looks like it calls functions,
|
||||
// but it switches the stack pointer first.
|
||||
if s == sc.morestack {
|
||||
return 0
|
||||
}
|
||||
|
||||
var ch chain
|
||||
ch.up = up
|
||||
|
||||
if !ldr.IsNoSplit(s) {
|
||||
// Ensure we have enough stack to call morestack.
|
||||
ch.limit = limit - callsize(ctxt)
|
||||
ch.sym = sc.morestack
|
||||
if sc.check(&ch, depth+1) < 0 {
|
||||
return -1
|
||||
}
|
||||
if !top {
|
||||
return 0
|
||||
}
|
||||
// Raise limit to allow frame.
|
||||
locals := info.Locals()
|
||||
limit = objabi.StackLimit + int(locals) + int(ctxt.Arch.FixedFrameSize)
|
||||
}
|
||||
|
||||
// Walk through sp adjustments in function, consuming relocs.
|
||||
relocs := ldr.Relocs(s)
|
||||
var ch1 chain
|
||||
pcsp := obj.NewPCIter(uint32(ctxt.Arch.MinLC))
|
||||
ri := 0
|
||||
for pcsp.Init(ldr.Data(ldr.Pcsp(s))); !pcsp.Done; pcsp.Next() {
|
||||
// pcsp.value is in effect for [pcsp.pc, pcsp.nextpc).
|
||||
|
||||
// Check stack size in effect for this span.
|
||||
if int32(limit)-pcsp.Value < 0 {
|
||||
sc.broke(up, int(int32(limit)-pcsp.Value))
|
||||
return -1
|
||||
}
|
||||
|
||||
// Process calls in this span.
|
||||
for ; ri < relocs.Count(); ri++ {
|
||||
r := relocs.At(ri)
|
||||
if uint32(r.Off()) >= pcsp.NextPC {
|
||||
break
|
||||
}
|
||||
t := r.Type()
|
||||
switch {
|
||||
case t.IsDirectCall():
|
||||
ch.limit = int(int32(limit) - pcsp.Value - int32(callsize(ctxt)))
|
||||
ch.sym = r.Sym()
|
||||
if sc.check(&ch, depth+1) < 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
// Indirect call. Assume it is a call to a splitting function,
|
||||
// so we have to make sure it can call morestack.
|
||||
// Arrange the data structures to report both calls, so that
|
||||
// if there is an error, stkprint shows all the steps involved.
|
||||
case t == objabi.R_CALLIND:
|
||||
ch.limit = int(int32(limit) - pcsp.Value - int32(callsize(ctxt)))
|
||||
ch.sym = 0
|
||||
ch1.limit = ch.limit - callsize(ctxt) // for morestack in called prologue
|
||||
ch1.up = &ch
|
||||
ch1.sym = sc.morestack
|
||||
if sc.check(&ch1, depth+2) < 0 {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (sc *stkChk) broke(ch *chain, limit int) {
|
||||
sc.ctxt.Errorf(ch.sym, "nosplit stack overflow")
|
||||
sc.print(ch, limit)
|
||||
}
|
||||
|
||||
func (sc *stkChk) print(ch *chain, limit int) {
|
||||
ldr := sc.ldr
|
||||
ctxt := sc.ctxt
|
||||
var name string
|
||||
if ch.sym != 0 {
|
||||
name = fmt.Sprintf("%s<%d>", ldr.SymName(ch.sym), ldr.SymVersion(ch.sym))
|
||||
if ldr.IsNoSplit(ch.sym) {
|
||||
name += " (nosplit)"
|
||||
}
|
||||
} else {
|
||||
name = "function pointer"
|
||||
}
|
||||
|
||||
if ch.up == nil {
|
||||
// top of chain. ch.sym != 0.
|
||||
if ldr.IsNoSplit(ch.sym) {
|
||||
fmt.Printf("\t%d\tassumed on entry to %s\n", ch.limit, name)
|
||||
} else {
|
||||
fmt.Printf("\t%d\tguaranteed after split check in %s\n", ch.limit, name)
|
||||
}
|
||||
} else {
|
||||
sc.print(ch.up, ch.limit+callsize(ctxt))
|
||||
if !ctxt.Arch.HasLR {
|
||||
fmt.Printf("\t%d\ton entry to %s\n", ch.limit, name)
|
||||
}
|
||||
}
|
||||
|
||||
if ch.limit != limit {
|
||||
fmt.Printf("\t%d\tafter %s uses %d\n", limit, name, ch.limit-limit)
|
||||
}
|
||||
}
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, "usage: link [options] main.o\n")
|
||||
objabi.Flagprint(os.Stderr)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue