runtime: log all thread stack traces during GODEBUG=crash on Unix

This extends https://golang.org/cl/2811, which only applied to Darwin and GNU/Linux, to all Unix systems. Fixes #9591. Change-Id: Iec3fb438564ba2924b15b447c0480f87c0bfd009 Reviewed-on: https://go-review.googlesource.com/12661 Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Matthew Dempsky <mdempsky@google.com> Reviewed-by: Russ Cox <rsc@golang.org>
2025-12-08 06:10:04 +00:00 · 2015-07-24 16:16:39 -07:00 · 2015-07-24 16:16:39 -07:00 · f0876a1a94
commit f0876a1a94
parent a2cf0568e8
19 changed files with 380 additions and 51 deletions
--- a/src/runtime/signal_arm.go
+++ b/src/runtime/signal_arm.go
@ -32,6 +32,8 @@ func dumpregs(c *sigctxt) {
 	print("fault   ", hex(c.fault()), "\n")
 }

+var crashing int32
+
 // May run during STW, so write barriers are not allowed.
 //go:nowritebarrier
 func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
@ -106,7 +108,10 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {

 	_g_.m.throwing = 1
 	_g_.m.caughtsig.set(gp)
-	startpanic()
+
+	if crashing == 0 {
+		startpanic()
+	}

 	if sig < uint32(len(sigtable)) {
 		print(sigtable[sig].name, "\n")
@ -114,7 +119,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 		print("Signal ", sig, "\n")
 	}

-	print("PC=", hex(c.pc()), "\n")
+	print("PC=", hex(c.pc()), " m=", _g_.m.id, "\n")
 	if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
 		print("signal arrived during cgo execution\n")
 		gp = _g_.m.lockedg
@ -125,12 +130,34 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 	if gotraceback(&docrash) > 0 {
 		goroutineheader(gp)
 		tracebacktrap(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.lr()), gp)
-		tracebackothers(gp)
-		print("\n")
+		if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
+			// tracebackothers on original m skipped this one; trace it now.
+			goroutineheader(_g_.m.curg)
+			traceback(^uintptr(0), ^uintptr(0), 0, gp)
+		} else if crashing == 0 {
+			tracebackothers(gp)
+			print("\n")
+		}
 		dumpregs(c)
 	}

 	if docrash {
+		crashing++
+		if crashing < sched.mcount {
+			// There are other m's that need to dump their stacks.
+			// Relay SIGQUIT to the next m by sending it to the current process.
+			// All m's that have already received SIGQUIT have signal masks blocking
+			// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
+			// When the last m receives the SIGQUIT, it will fall through to the call to
+			// crash below. Just in case the relaying gets botched, each m involved in
+			// the relay sleeps for 5 seconds and then does the crash/exit itself.
+			// In expected operation, the last m has received the SIGQUIT and run
+			// crash/exit and the process is gone, all long before any of the
+			// 5-second sleeps have finished.
+			print("\n-----\n\n")
+			raiseproc(_SIGQUIT)
+			usleep(5 * 1000 * 1000)
+		}
 		crash()
 	}