mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
syscall: syscall.AllThreadsSyscall signal handling fixes
The runtime support for syscall.AllThreadsSyscall() functions had some corner case deadlock issues when signal handling was in use. This was observed in at least 3 build test failures on ppc64 and amd64 architecture CGO_ENABLED=0 builds over the last few months. The fixes involve more controlled handling of signals while the AllThreads mechanism is being executed. Further details are discussed in bug #44193. The all-threads syscall support is new in go1.16, so earlier releases are not affected by this bug. Fixes #44193 Change-Id: I01ba8508a6e1bb2d872751f50da86dd07911a41d Reviewed-on: https://go-review.googlesource.com/c/go/+/305149 Reviewed-by: Michael Pratt <mpratt@google.com> Trust: Michael Pratt <mpratt@google.com> Trust: Ian Lance Taylor <iant@golang.org> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
parent
54af9fd9e6
commit
7e97e4e8cc
4 changed files with 96 additions and 7 deletions
|
|
@ -1402,6 +1402,9 @@ func mPark() {
|
|||
g := getg()
|
||||
for {
|
||||
notesleep(&g.m.park)
|
||||
// Note, because of signal handling by this parked m,
|
||||
// a preemptive mDoFixup() may actually occur via
|
||||
// mDoFixupAndOSYield(). (See golang.org/issue/44193)
|
||||
noteclear(&g.m.park)
|
||||
if !mDoFixup() {
|
||||
return
|
||||
|
|
@ -1635,6 +1638,22 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
|
|||
for atomic.Load(&sched.sysmonStarting) != 0 {
|
||||
osyield()
|
||||
}
|
||||
|
||||
// We don't want this thread to handle signals for the
|
||||
// duration of this critical section. The underlying issue
|
||||
// being that this locked coordinating m is the one monitoring
|
||||
// for fn() execution by all the other m's of the runtime,
|
||||
// while no regular go code execution is permitted (the world
|
||||
// is stopped). If this present m were to get distracted to
|
||||
// run signal handling code, and find itself waiting for a
|
||||
// second thread to execute go code before being able to
|
||||
// return from that signal handling, a deadlock will result.
|
||||
// (See golang.org/issue/44193.)
|
||||
lockOSThread()
|
||||
var sigmask sigset
|
||||
sigsave(&sigmask)
|
||||
sigblock(false)
|
||||
|
||||
stopTheWorldGC("doAllThreadsSyscall")
|
||||
if atomic.Load(&newmHandoff.haveTemplateThread) != 0 {
|
||||
// Ensure that there are no in-flight thread
|
||||
|
|
@ -1686,6 +1705,7 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
|
|||
// the possibility of racing with mp.
|
||||
lock(&mp.mFixup.lock)
|
||||
mp.mFixup.fn = fn
|
||||
atomic.Store(&mp.mFixup.used, 1)
|
||||
if mp.doesPark {
|
||||
// For non-service threads this will
|
||||
// cause the wakeup to be short lived
|
||||
|
|
@ -1702,9 +1722,7 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
|
|||
if mp.procid == tid {
|
||||
continue
|
||||
}
|
||||
lock(&mp.mFixup.lock)
|
||||
done = done && (mp.mFixup.fn == nil)
|
||||
unlock(&mp.mFixup.lock)
|
||||
done = atomic.Load(&mp.mFixup.used) == 0
|
||||
}
|
||||
if done {
|
||||
break
|
||||
|
|
@ -1731,6 +1749,8 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
|
|||
unlock(&mFixupRace.lock)
|
||||
}
|
||||
startTheWorldGC()
|
||||
msigrestore(sigmask)
|
||||
unlockOSThread()
|
||||
}
|
||||
|
||||
// runSafePointFn runs the safe point function, if any, for this P.
|
||||
|
|
@ -2225,9 +2245,21 @@ var mFixupRace struct {
|
|||
// mDoFixup runs any outstanding fixup function for the running m.
|
||||
// Returns true if a fixup was outstanding and actually executed.
|
||||
//
|
||||
// Note: to avoid deadlocks, and the need for the fixup function
|
||||
// itself to be async safe, signals are blocked for the working m
|
||||
// while it holds the mFixup lock. (See golang.org/issue/44193)
|
||||
//
|
||||
//go:nosplit
|
||||
func mDoFixup() bool {
|
||||
_g_ := getg()
|
||||
if used := atomic.Load(&_g_.m.mFixup.used); used == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// slow path - if fixup fn is used, block signals and lock.
|
||||
var sigmask sigset
|
||||
sigsave(&sigmask)
|
||||
sigblock(false)
|
||||
lock(&_g_.m.mFixup.lock)
|
||||
fn := _g_.m.mFixup.fn
|
||||
if fn != nil {
|
||||
|
|
@ -2244,7 +2276,6 @@ func mDoFixup() bool {
|
|||
// is more obviously safe.
|
||||
throw("GC must be disabled to protect validity of fn value")
|
||||
}
|
||||
*(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0
|
||||
if _g_.racectx != 0 || !raceenabled {
|
||||
fn(false)
|
||||
} else {
|
||||
|
|
@ -2259,11 +2290,24 @@ func mDoFixup() bool {
|
|||
_g_.racectx = 0
|
||||
unlock(&mFixupRace.lock)
|
||||
}
|
||||
*(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0
|
||||
atomic.Store(&_g_.m.mFixup.used, 0)
|
||||
}
|
||||
unlock(&_g_.m.mFixup.lock)
|
||||
msigrestore(sigmask)
|
||||
return fn != nil
|
||||
}
|
||||
|
||||
// mDoFixupAndOSYield is called when an m is unable to send a signal
|
||||
// because the allThreadsSyscall mechanism is in progress. That is, an
|
||||
// mPark() has been interrupted with this signal handler so we need to
|
||||
// ensure the fixup is executed from this context.
|
||||
//go:nosplit
|
||||
func mDoFixupAndOSYield() {
|
||||
mDoFixup()
|
||||
osyield()
|
||||
}
|
||||
|
||||
// templateThread is a thread in a known-good state that exists solely
|
||||
// to start new threads in known-good states when the calling thread
|
||||
// may not be in a good state.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue