mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: print a stack trace at "morestack on g0"
Error like "morestack on g0" is one of the errors that is very hard to debug, because often it doesn't print a useful stack trace. The runtime doesn't directly print a stack trace because it is a bad stack state to call print. Sometimes the SIGABRT may trigger a traceback, but sometimes not especially in a cgo binary. Even if it triggers a traceback it often does not include the stack trace of the bad stack. This CL makes it explicitly print a stack trace and throw. The idea is to have some space as an "emergency" crash stack. When the stack is in a really bad state, we switch to the crash stack and do a traceback. Currently only implemented on AMD64 and ARM64. TODO: also handle errors like "morestack on gsignal" and bad systemstack. Also handle other architectures. Change-Id: Ibfc397202f2bb0737c5cbe99f2763de83301c1c1 Reviewed-on: https://go-review.googlesource.com/c/go/+/419435 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
parent
29b80397a8
commit
0262ea1ff9
6 changed files with 147 additions and 27 deletions
|
|
@ -12,3 +12,11 @@ TEXT ·sigpanic0(SB),NOSPLIT,$0-0
|
|||
// See map.go comment on the need for this routine.
|
||||
TEXT ·mapinitnoop<ABIInternal>(SB),NOSPLIT,$0-0
|
||||
RET
|
||||
|
||||
#ifndef GOARCH_amd64
|
||||
#ifndef GOARCH_arm64
|
||||
// stub to appease shared build mode.
|
||||
TEXT ·switchToCrashStack0<ABIInternal>(SB),NOSPLIT,$0-0
|
||||
UNDEF
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -537,6 +537,30 @@ bad:
|
|||
CALL AX
|
||||
INT $3
|
||||
|
||||
// func switchToCrashStack0(fn func())
|
||||
TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
|
||||
MOVQ g_m(R14), BX // curm
|
||||
|
||||
// set g to gcrash
|
||||
LEAQ runtime·gcrash(SB), R14 // g = &gcrash
|
||||
MOVQ BX, g_m(R14) // g.m = curm
|
||||
MOVQ R14, m_g0(BX) // curm.g0 = g
|
||||
get_tls(CX)
|
||||
MOVQ R14, g(CX)
|
||||
|
||||
// switch to crashstack
|
||||
MOVQ (g_stack+stack_hi)(R14), BX
|
||||
SUBQ $(4*8), BX
|
||||
MOVQ BX, SP
|
||||
|
||||
// call target function
|
||||
MOVQ AX, DX
|
||||
MOVQ 0(AX), AX
|
||||
CALL AX
|
||||
|
||||
// should never return
|
||||
CALL runtime·abort(SB)
|
||||
UNDEF
|
||||
|
||||
/*
|
||||
* support for morestack
|
||||
|
|
@ -551,17 +575,26 @@ bad:
|
|||
TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
|
||||
// Cannot grow scheduler stack (m->g0).
|
||||
get_tls(CX)
|
||||
MOVQ g(CX), BX
|
||||
MOVQ g_m(BX), BX
|
||||
MOVQ m_g0(BX), SI
|
||||
CMPQ g(CX), SI
|
||||
MOVQ g(CX), DI // DI = g
|
||||
MOVQ g_m(DI), BX // BX = m
|
||||
|
||||
// Set g->sched to context in f.
|
||||
MOVQ 0(SP), AX // f's PC
|
||||
MOVQ AX, (g_sched+gobuf_pc)(DI)
|
||||
LEAQ 8(SP), AX // f's SP
|
||||
MOVQ AX, (g_sched+gobuf_sp)(DI)
|
||||
MOVQ BP, (g_sched+gobuf_bp)(DI)
|
||||
MOVQ DX, (g_sched+gobuf_ctxt)(DI)
|
||||
|
||||
MOVQ m_g0(BX), SI // SI = m.g0
|
||||
CMPQ DI, SI
|
||||
JNE 3(PC)
|
||||
CALL runtime·badmorestackg0(SB)
|
||||
CALL runtime·abort(SB)
|
||||
|
||||
// Cannot grow signal stack (m->gsignal).
|
||||
MOVQ m_gsignal(BX), SI
|
||||
CMPQ g(CX), SI
|
||||
CMPQ DI, SI
|
||||
JNE 3(PC)
|
||||
CALL runtime·badmorestackgsignal(SB)
|
||||
CALL runtime·abort(SB)
|
||||
|
|
@ -573,17 +606,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
|
|||
MOVQ AX, (m_morebuf+gobuf_pc)(BX)
|
||||
LEAQ 16(SP), AX // f's caller's SP
|
||||
MOVQ AX, (m_morebuf+gobuf_sp)(BX)
|
||||
get_tls(CX)
|
||||
MOVQ g(CX), SI
|
||||
MOVQ SI, (m_morebuf+gobuf_g)(BX)
|
||||
|
||||
// Set g->sched to context in f.
|
||||
MOVQ 0(SP), AX // f's PC
|
||||
MOVQ AX, (g_sched+gobuf_pc)(SI)
|
||||
LEAQ 8(SP), AX // f's SP
|
||||
MOVQ AX, (g_sched+gobuf_sp)(SI)
|
||||
MOVQ BP, (g_sched+gobuf_bp)(SI)
|
||||
MOVQ DX, (g_sched+gobuf_ctxt)(SI)
|
||||
MOVQ DI, (m_morebuf+gobuf_g)(BX)
|
||||
|
||||
// Call newstack on m->g0's stack.
|
||||
MOVQ m_g0(BX), BX
|
||||
|
|
|
|||
|
|
@ -262,6 +262,30 @@ noswitch:
|
|||
SUB $8, RSP, R29 // restore FP
|
||||
B (R3)
|
||||
|
||||
// func switchToCrashStack0(fn func())
|
||||
TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
|
||||
MOVD R0, R26 // context register
|
||||
MOVD g_m(g), R1 // curm
|
||||
|
||||
// set g to gcrash
|
||||
MOVD $runtime·gcrash(SB), g // g = &gcrash
|
||||
BL runtime·save_g(SB) // clobbers R0
|
||||
MOVD R1, g_m(g) // g.m = curm
|
||||
MOVD g, m_g0(R1) // curm.g0 = g
|
||||
|
||||
// switch to crashstack
|
||||
MOVD (g_stack+stack_hi)(g), R1
|
||||
SUB $(4*8), R1
|
||||
MOVD R1, RSP
|
||||
|
||||
// call target function
|
||||
MOVD 0(R26), R0
|
||||
CALL (R0)
|
||||
|
||||
// should never return
|
||||
CALL runtime·abort(SB)
|
||||
UNDEF
|
||||
|
||||
/*
|
||||
* support for morestack
|
||||
*/
|
||||
|
|
@ -278,6 +302,16 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
|
|||
// Cannot grow scheduler stack (m->g0).
|
||||
MOVD g_m(g), R8
|
||||
MOVD m_g0(R8), R4
|
||||
|
||||
// Called from f.
|
||||
// Set g->sched to context in f
|
||||
MOVD RSP, R0
|
||||
MOVD R0, (g_sched+gobuf_sp)(g)
|
||||
MOVD R29, (g_sched+gobuf_bp)(g)
|
||||
MOVD LR, (g_sched+gobuf_pc)(g)
|
||||
MOVD R3, (g_sched+gobuf_lr)(g)
|
||||
MOVD R26, (g_sched+gobuf_ctxt)(g)
|
||||
|
||||
CMP g, R4
|
||||
BNE 3(PC)
|
||||
BL runtime·badmorestackg0(SB)
|
||||
|
|
@ -290,15 +324,6 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
|
|||
BL runtime·badmorestackgsignal(SB)
|
||||
B runtime·abort(SB)
|
||||
|
||||
// Called from f.
|
||||
// Set g->sched to context in f
|
||||
MOVD RSP, R0
|
||||
MOVD R0, (g_sched+gobuf_sp)(g)
|
||||
MOVD R29, (g_sched+gobuf_bp)(g)
|
||||
MOVD LR, (g_sched+gobuf_pc)(g)
|
||||
MOVD R3, (g_sched+gobuf_lr)(g)
|
||||
MOVD R26, (g_sched+gobuf_ctxt)(g)
|
||||
|
||||
// Called from f.
|
||||
// Set m->morebuf to f's callers.
|
||||
MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
|
||||
|
|
|
|||
|
|
@ -804,6 +804,14 @@ func TestG0StackOverflow(t *testing.T) {
|
|||
if n := strings.Count(string(out), "morestack on g0\n"); n != 1 {
|
||||
t.Fatalf("%s\n(exit status %v)", out, err)
|
||||
}
|
||||
if runtime.GOARCH == "amd64" || runtime.GOARCH == "arm64" {
|
||||
// check for a stack trace
|
||||
want := "runtime.stackOverflow"
|
||||
if n := strings.Count(string(out), want); n < 5 {
|
||||
t.Errorf("output does not contain %q at least 5 times:\n%s", want, out)
|
||||
}
|
||||
return // it's not a signal-style traceback
|
||||
}
|
||||
// Check that it's a signal-style traceback.
|
||||
if runtime.GOOS != "windows" {
|
||||
if want := "PC="; !strings.Contains(string(out), want) {
|
||||
|
|
|
|||
|
|
@ -694,7 +694,7 @@ func G0StackOverflow() {
|
|||
// The stack bounds for g0 stack is not always precise.
|
||||
// Use an artificially small stack, to trigger a stack overflow
|
||||
// without actually run out of the system stack (which may seg fault).
|
||||
g0.stack.lo = sp - 4096
|
||||
g0.stack.lo = sp - 4096 - stackSystem
|
||||
g0.stackguard0 = g0.stack.lo + stackGuard
|
||||
g0.stackguard1 = g0.stackguard0
|
||||
|
||||
|
|
|
|||
|
|
@ -516,7 +516,20 @@ func badreflectcall() {
|
|||
//go:nosplit
|
||||
//go:nowritebarrierrec
|
||||
func badmorestackg0() {
|
||||
writeErrStr("fatal: morestack on g0\n")
|
||||
if !crashStackImplemented {
|
||||
writeErrStr("fatal: morestack on g0\n")
|
||||
return
|
||||
}
|
||||
|
||||
g := getg()
|
||||
switchToCrashStack(func() {
|
||||
print("runtime: morestack on g0, stack [", hex(g.stack.lo), " ", hex(g.stack.hi), "], sp=", hex(g.sched.sp), ", called from\n")
|
||||
g.m.traceback = 2 // include pc and sp in stack trace
|
||||
traceback1(g.sched.pc, g.sched.sp, g.sched.lr, g, 0)
|
||||
print("\n")
|
||||
|
||||
throw("morestack on g0")
|
||||
})
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
|
|
@ -530,6 +543,49 @@ func badctxt() {
|
|||
throw("ctxt != 0")
|
||||
}
|
||||
|
||||
// crashstack is a space that can be used as the stack when it is
|
||||
// crashing on bad stack conditions, e.g. morestack on g0.
|
||||
// gcrash is the corresponding (fake) g.
|
||||
var crashstack [16384]byte
|
||||
|
||||
var gcrash = g{
|
||||
stack: stack{uintptr(unsafe.Pointer(&crashstack)), uintptr(unsafe.Pointer(&crashstack)) + unsafe.Sizeof(crashstack)},
|
||||
stackguard0: uintptr(unsafe.Pointer(&crashstack)) + 1000,
|
||||
stackguard1: uintptr(unsafe.Pointer(&crashstack)) + 1000,
|
||||
}
|
||||
|
||||
var crashingG atomic.Pointer[g]
|
||||
|
||||
// Switch to crashstack and call fn, with special handling of
|
||||
// concurrent and recursive cases.
|
||||
//
|
||||
// Nosplit as it is called in a bad stack condition (we know
|
||||
// morestack would fail).
|
||||
//
|
||||
//go:nosplit
|
||||
//go:nowritebarrierrec
|
||||
func switchToCrashStack(fn func()) {
|
||||
me := getg()
|
||||
if crashingG.CompareAndSwapNoWB(nil, me) {
|
||||
switchToCrashStack0(fn) // should never return
|
||||
abort()
|
||||
}
|
||||
if crashingG.Load() == me {
|
||||
// recursive crashing. too bad.
|
||||
writeErrStr("fatal: recursive switchToCrashStack\n")
|
||||
abort()
|
||||
}
|
||||
// Another g is crashing. Give it some time, hopefully it will finish traceback.
|
||||
usleep_no_g(100)
|
||||
writeErrStr("fatal: concurrent switchToCrashStack\n")
|
||||
abort()
|
||||
}
|
||||
|
||||
const crashStackImplemented = GOARCH == "amd64" || GOARCH == "arm64"
|
||||
|
||||
//go:noescape
|
||||
func switchToCrashStack0(func()) // in assembly
|
||||
|
||||
func lockedOSThread() bool {
|
||||
gp := getg()
|
||||
return gp.lockedm != 0 && gp.m.lockedg != 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue