mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: resolve checkdead panic by refining startm lock handling in caller context
This change addresses a `checkdead` panic caused by a race condition between `sysmon->startm` and `checkdead` callers, due to prematurely releasing the scheduler lock. The solution involves allowing a `startm` caller to acquire the scheduler lock and call `startm` in this context. A new `lockheld` bool argument is added to `startm`, which manages all lock and unlock calls within the function. The`startIdle` function variable in `injectglist` is updated to call `startm` with the lock held, ensuring proper lock handling in this specific case. This refined lock handling resolves the observed race condition issue. Fixes #59600 Change-Id: I11663a15536c10c773fc2fde291d959099aa71be Reviewed-on: https://go-review.googlesource.com/c/go/+/487316 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Michael Pratt <mpratt@google.com>
This commit is contained in:
parent
752ef81056
commit
ff059add10
1 changed files with 31 additions and 14 deletions
|
|
@ -2435,10 +2435,15 @@ func mspinning() {
|
|||
// Callers passing a non-nil P must call from a non-preemptible context. See
|
||||
// comment on acquirem below.
|
||||
//
|
||||
// Argument lockheld indicates whether the caller already acquired the
|
||||
// scheduler lock. Callers holding the lock when making the call must pass
|
||||
// true. The lock might be temporarily dropped, but will be reacquired before
|
||||
// returning.
|
||||
//
|
||||
// Must not have write barriers because this may be called without a P.
|
||||
//
|
||||
//go:nowritebarrierrec
|
||||
func startm(pp *p, spinning bool) {
|
||||
func startm(pp *p, spinning, lockheld bool) {
|
||||
// Disable preemption.
|
||||
//
|
||||
// Every owned P must have an owner that will eventually stop it in the
|
||||
|
|
@ -2456,7 +2461,9 @@ func startm(pp *p, spinning bool) {
|
|||
// startm. Callers passing a nil P may be preemptible, so we must
|
||||
// disable preemption before acquiring a P from pidleget below.
|
||||
mp := acquirem()
|
||||
lock(&sched.lock)
|
||||
if !lockheld {
|
||||
lock(&sched.lock)
|
||||
}
|
||||
if pp == nil {
|
||||
if spinning {
|
||||
// TODO(prattmic): All remaining calls to this function
|
||||
|
|
@ -2466,7 +2473,9 @@ func startm(pp *p, spinning bool) {
|
|||
}
|
||||
pp, _ = pidleget(0)
|
||||
if pp == nil {
|
||||
unlock(&sched.lock)
|
||||
if !lockheld {
|
||||
unlock(&sched.lock)
|
||||
}
|
||||
releasem(mp)
|
||||
return
|
||||
}
|
||||
|
|
@ -2480,6 +2489,8 @@ func startm(pp *p, spinning bool) {
|
|||
// could find no idle P while checkdead finds a runnable G but
|
||||
// no running M's because this new M hasn't started yet, thus
|
||||
// throwing in an apparent deadlock.
|
||||
// This apparent deadlock is possible when startm is called
|
||||
// from sysmon, which doesn't count as a running M.
|
||||
//
|
||||
// Avoid this situation by pre-allocating the ID for the new M,
|
||||
// thus marking it as 'running' before we drop sched.lock. This
|
||||
|
|
@ -2494,12 +2505,18 @@ func startm(pp *p, spinning bool) {
|
|||
fn = mspinning
|
||||
}
|
||||
newm(fn, pp, id)
|
||||
|
||||
if lockheld {
|
||||
lock(&sched.lock)
|
||||
}
|
||||
// Ownership transfer of pp committed by start in newm.
|
||||
// Preemption is now safe.
|
||||
releasem(mp)
|
||||
return
|
||||
}
|
||||
unlock(&sched.lock)
|
||||
if !lockheld {
|
||||
unlock(&sched.lock)
|
||||
}
|
||||
if nmp.spinning {
|
||||
throw("startm: m is spinning")
|
||||
}
|
||||
|
|
@ -2528,24 +2545,24 @@ func handoffp(pp *p) {
|
|||
|
||||
// if it has local work, start it straight away
|
||||
if !runqempty(pp) || sched.runqsize != 0 {
|
||||
startm(pp, false)
|
||||
startm(pp, false, false)
|
||||
return
|
||||
}
|
||||
// if there's trace work to do, start it straight away
|
||||
if (trace.enabled || trace.shutdown) && traceReaderAvailable() != nil {
|
||||
startm(pp, false)
|
||||
startm(pp, false, false)
|
||||
return
|
||||
}
|
||||
// if it has GC work, start it straight away
|
||||
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) {
|
||||
startm(pp, false)
|
||||
startm(pp, false, false)
|
||||
return
|
||||
}
|
||||
// no local work, check that there are no spinning/idle M's,
|
||||
// otherwise our help is not required
|
||||
if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic
|
||||
sched.needspinning.Store(0)
|
||||
startm(pp, true)
|
||||
startm(pp, true, false)
|
||||
return
|
||||
}
|
||||
lock(&sched.lock)
|
||||
|
|
@ -2567,14 +2584,14 @@ func handoffp(pp *p) {
|
|||
}
|
||||
if sched.runqsize != 0 {
|
||||
unlock(&sched.lock)
|
||||
startm(pp, false)
|
||||
startm(pp, false, false)
|
||||
return
|
||||
}
|
||||
// If this is the last running P and nobody is polling network,
|
||||
// need to wakeup another M to poll network.
|
||||
if sched.npidle.Load() == gomaxprocs-1 && sched.lastpoll.Load() != 0 {
|
||||
unlock(&sched.lock)
|
||||
startm(pp, false)
|
||||
startm(pp, false, false)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -2623,7 +2640,7 @@ func wakep() {
|
|||
// see at least one running M (ours).
|
||||
unlock(&sched.lock)
|
||||
|
||||
startm(pp, true)
|
||||
startm(pp, true, false)
|
||||
|
||||
releasem(mp)
|
||||
}
|
||||
|
|
@ -3376,8 +3393,8 @@ func injectglist(glist *gList) {
|
|||
break
|
||||
}
|
||||
|
||||
startm(pp, false, true)
|
||||
unlock(&sched.lock)
|
||||
startm(pp, false)
|
||||
releasem(mp)
|
||||
}
|
||||
}
|
||||
|
|
@ -5484,7 +5501,7 @@ func sysmon() {
|
|||
// See issue 42515 and
|
||||
// https://gnats.netbsd.org/cgi-bin/query-pr-single.pl?number=50094.
|
||||
if next := timeSleepUntil(); next < now {
|
||||
startm(nil, false)
|
||||
startm(nil, false, false)
|
||||
}
|
||||
}
|
||||
if scavenger.sysmonWake.Load() != 0 {
|
||||
|
|
@ -5756,7 +5773,7 @@ func schedEnableUser(enable bool) {
|
|||
globrunqputbatch(&sched.disable.runnable, n)
|
||||
unlock(&sched.lock)
|
||||
for ; n != 0 && sched.npidle.Load() != 0; n-- {
|
||||
startm(nil, false)
|
||||
startm(nil, false, false)
|
||||
}
|
||||
} else {
|
||||
unlock(&sched.lock)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue