2014-11-11 17:08:33 -05:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
|
|
|
|
|
import "unsafe"
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
m0 m
|
|
|
|
|
g0 g
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Goroutine scheduler
|
|
|
|
|
// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
|
|
|
|
|
//
|
|
|
|
|
// The main concepts are:
|
|
|
|
|
// G - goroutine.
|
|
|
|
|
// M - worker thread, or machine.
|
|
|
|
|
// P - processor, a resource that is required to execute Go code.
|
|
|
|
|
// M must have an associated P to execute Go code, however it can be
|
|
|
|
|
// blocked or in a syscall w/o an associated P.
|
|
|
|
|
//
|
2015-07-10 17:17:11 -06:00
|
|
|
// Design doc at https://golang.org/s/go11sched.
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
|
|
|
|
|
// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
|
|
|
|
|
_GoidCacheBatch = 16
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// The bootstrap sequence is:
|
|
|
|
|
//
|
|
|
|
|
// call osinit
|
|
|
|
|
// call schedinit
|
|
|
|
|
// make & queue new G
|
|
|
|
|
// call runtime·mstart
|
|
|
|
|
//
|
|
|
|
|
// The new G calls runtime·main.
|
|
|
|
|
func schedinit() {
|
|
|
|
|
// raceinit must be the first call to race detector.
|
|
|
|
|
// In particular, it must be done before mallocinit below calls racemapshadow.
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
if raceenabled {
|
|
|
|
|
_g_.racectx = raceinit()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sched.maxmcount = 10000
|
|
|
|
|
|
2015-01-14 11:09:50 -05:00
|
|
|
// Cache the framepointer experiment. This affects stack unwinding.
|
|
|
|
|
framepointer_enabled = haveexperiment("framepointer")
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
tracebackinit()
|
2015-05-06 14:30:28 +12:00
|
|
|
moduledataverify()
|
2014-11-11 17:08:33 -05:00
|
|
|
stackinit()
|
|
|
|
|
mallocinit()
|
|
|
|
|
mcommoninit(_g_.m)
|
|
|
|
|
|
|
|
|
|
goargs()
|
|
|
|
|
goenvs()
|
|
|
|
|
parsedebugvars()
|
|
|
|
|
gcinit()
|
|
|
|
|
|
|
|
|
|
sched.lastpoll = uint64(nanotime())
|
2015-05-28 12:35:35 -04:00
|
|
|
procs := int(ncpu)
|
2014-12-28 23:16:32 -08:00
|
|
|
if n := atoi(gogetenv("GOMAXPROCS")); n > 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
if n > _MaxGomaxprocs {
|
|
|
|
|
n = _MaxGomaxprocs
|
|
|
|
|
}
|
|
|
|
|
procs = n
|
|
|
|
|
}
|
2014-12-22 18:14:00 +03:00
|
|
|
if procresize(int32(procs)) != nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("unknown runnable goroutine during bootstrap")
|
2014-12-22 18:14:00 +03:00
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
if buildVersion == "" {
|
|
|
|
|
// Condition should never trigger. This code just serves
|
|
|
|
|
// to ensure runtime·buildVersion is kept in the resulting binary.
|
|
|
|
|
buildVersion = "unknown"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func dumpgstatus(gp *g) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
|
|
|
|
|
print("runtime: g: g=", _g_, ", goid=", _g_.goid, ", g->atomicstatus=", readgstatus(_g_), "\n")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func checkmcount() {
|
|
|
|
|
// sched lock is held
|
|
|
|
|
if sched.mcount > sched.maxmcount {
|
|
|
|
|
print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("thread exhaustion")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func mcommoninit(mp *m) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
// g0 stack won't make sense for user (and is not necessary unwindable).
|
|
|
|
|
if _g_ != _g_.m.g0 {
|
2015-02-25 14:41:21 +09:00
|
|
|
callers(1, mp.createstack[:])
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
|
|
|
|
|
if mp.fastrand == 0 {
|
|
|
|
|
mp.fastrand = 0x49f6428a
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
mp.id = sched.mcount
|
|
|
|
|
sched.mcount++
|
|
|
|
|
checkmcount()
|
|
|
|
|
mpreinit(mp)
|
2015-01-05 16:29:21 +00:00
|
|
|
if mp.gsignal != nil {
|
|
|
|
|
mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
// Add to allm so garbage collector doesn't free g->m
|
|
|
|
|
// when it is just in a register or thread-local storage.
|
|
|
|
|
mp.alllink = allm
|
|
|
|
|
|
|
|
|
|
// NumCgoCall() iterates over allm w/o schedlock,
|
|
|
|
|
// so we need to publish it safely.
|
|
|
|
|
atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Mark gp ready to run.
|
2015-02-21 21:01:40 +03:00
|
|
|
func ready(gp *g, traceskip int) {
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-02-21 21:01:40 +03:00
|
|
|
traceGoUnpark(gp, traceskip)
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
status := readgstatus(gp)
|
|
|
|
|
|
|
|
|
|
// Mark runnable.
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
_g_.m.locks++ // disable preemption because it can be holding p in a local var
|
|
|
|
|
if status&^_Gscan != _Gwaiting {
|
|
|
|
|
dumpgstatus(gp)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad g->status in ready")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
|
|
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
runqput(_g_.m.p.ptr(), gp, true)
|
2014-11-11 17:08:33 -05:00
|
|
|
if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 { // TODO: fast atomic
|
|
|
|
|
wakep()
|
|
|
|
|
}
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func gcprocs() int32 {
|
|
|
|
|
// Figure out how many CPUs to use during GC.
|
|
|
|
|
// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
n := gomaxprocs
|
|
|
|
|
if n > ncpu {
|
|
|
|
|
n = ncpu
|
|
|
|
|
}
|
|
|
|
|
if n > _MaxGcproc {
|
|
|
|
|
n = _MaxGcproc
|
|
|
|
|
}
|
|
|
|
|
if n > sched.nmidle+1 { // one M is currently running
|
|
|
|
|
n = sched.nmidle + 1
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func needaddgcproc() bool {
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
n := gomaxprocs
|
|
|
|
|
if n > ncpu {
|
|
|
|
|
n = ncpu
|
|
|
|
|
}
|
|
|
|
|
if n > _MaxGcproc {
|
|
|
|
|
n = _MaxGcproc
|
|
|
|
|
}
|
|
|
|
|
n -= sched.nmidle + 1 // one M is currently running
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
return n > 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func helpgc(nproc int32) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
pos := 0
|
|
|
|
|
for n := int32(1); n < nproc; n++ { // one M is currently running
|
|
|
|
|
if allp[pos].mcache == _g_.m.mcache {
|
|
|
|
|
pos++
|
|
|
|
|
}
|
|
|
|
|
mp := mget()
|
|
|
|
|
if mp == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("gcprocs inconsistency")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
mp.helpgc = n
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.p.set(allp[pos])
|
2014-11-11 17:08:33 -05:00
|
|
|
mp.mcache = allp[pos].mcache
|
|
|
|
|
pos++
|
|
|
|
|
notewakeup(&mp.park)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-27 16:11:11 -04:00
|
|
|
// freezeStopWait is a large value that freezetheworld sets
|
|
|
|
|
// sched.stopwait to in order to request that all Gs permanently stop.
|
|
|
|
|
const freezeStopWait = 0x7fffffff
|
|
|
|
|
|
2015-05-15 16:00:50 -04:00
|
|
|
// Similar to stopTheWorld but best-effort and can be called several times.
|
2014-11-11 17:08:33 -05:00
|
|
|
// There is no reverse operation, used during crashing.
|
|
|
|
|
// This function must not lock any mutexes.
|
|
|
|
|
func freezetheworld() {
|
|
|
|
|
// stopwait and preemption requests can be lost
|
|
|
|
|
// due to races with concurrently executing threads,
|
|
|
|
|
// so try several times
|
|
|
|
|
for i := 0; i < 5; i++ {
|
|
|
|
|
// this should tell the scheduler to not start any new goroutines
|
2015-03-27 16:11:11 -04:00
|
|
|
sched.stopwait = freezeStopWait
|
2014-11-11 17:08:33 -05:00
|
|
|
atomicstore(&sched.gcwaiting, 1)
|
|
|
|
|
// this should stop running goroutines
|
|
|
|
|
if !preemptall() {
|
|
|
|
|
break // no running goroutines
|
|
|
|
|
}
|
|
|
|
|
usleep(1000)
|
|
|
|
|
}
|
|
|
|
|
// to be sure
|
|
|
|
|
usleep(1000)
|
|
|
|
|
preemptall()
|
|
|
|
|
usleep(1000)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isscanstatus(status uint32) bool {
|
|
|
|
|
if status == _Gscan {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("isscanstatus: Bad status Gscan")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
return status&_Gscan == _Gscan
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// All reads and writes of g's status go through readgstatus, casgstatus
|
|
|
|
|
// castogscanstatus, casfrom_Gscanstatus.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func readgstatus(gp *g) uint32 {
|
|
|
|
|
return atomicload(&gp.atomicstatus)
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-17 13:56:03 -04:00
|
|
|
// Ownership of gscanvalid:
|
|
|
|
|
//
|
|
|
|
|
// If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
|
|
|
|
|
// then gp owns gp.gscanvalid, and other goroutines must not modify it.
|
|
|
|
|
//
|
|
|
|
|
// Otherwise, a second goroutine can lock the scan state by setting _Gscan
|
|
|
|
|
// in the status bit and then modify gscanvalid, and then unlock the scan state.
|
|
|
|
|
//
|
|
|
|
|
// Note that the first condition implies an exception to the second:
|
|
|
|
|
// if a second goroutine changes gp's status to _Grunning|_Gscan,
|
|
|
|
|
// that second goroutine still does not have the right to modify gscanvalid.
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// The Gscanstatuses are acting like locks and this releases them.
|
|
|
|
|
// If it proves to be a performance hit we should be able to make these
|
|
|
|
|
// simple atomic stores but for now we are going to throw if
|
|
|
|
|
// we see an inconsistent state.
|
|
|
|
|
func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
|
|
|
|
|
success := false
|
|
|
|
|
|
|
|
|
|
// Check that transition is valid.
|
|
|
|
|
switch oldval {
|
2014-11-21 16:46:27 -05:00
|
|
|
default:
|
|
|
|
|
print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
|
|
|
|
|
dumpgstatus(gp)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("casfrom_Gscanstatus:top gp->status is not in scan state")
|
2014-11-11 17:08:33 -05:00
|
|
|
case _Gscanrunnable,
|
|
|
|
|
_Gscanwaiting,
|
|
|
|
|
_Gscanrunning,
|
|
|
|
|
_Gscansyscall:
|
|
|
|
|
if newval == oldval&^_Gscan {
|
|
|
|
|
success = cas(&gp.atomicstatus, oldval, newval)
|
|
|
|
|
}
|
|
|
|
|
case _Gscanenqueue:
|
|
|
|
|
if newval == _Gwaiting {
|
|
|
|
|
success = cas(&gp.atomicstatus, oldval, newval)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !success {
|
|
|
|
|
print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
|
|
|
|
|
dumpgstatus(gp)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("casfrom_Gscanstatus: gp->status is not in scan state")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-01-26 13:51:39 -05:00
|
|
|
if newval == _Grunning {
|
|
|
|
|
gp.gcscanvalid = false
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This will return false if the gp is not in the expected status and the cas fails.
|
|
|
|
|
// This acts like a lock acquire while the casfromgstatus acts like a lock release.
|
|
|
|
|
func castogscanstatus(gp *g, oldval, newval uint32) bool {
|
|
|
|
|
switch oldval {
|
|
|
|
|
case _Grunnable,
|
|
|
|
|
_Gwaiting,
|
|
|
|
|
_Gsyscall:
|
|
|
|
|
if newval == oldval|_Gscan {
|
|
|
|
|
return cas(&gp.atomicstatus, oldval, newval)
|
|
|
|
|
}
|
|
|
|
|
case _Grunning:
|
|
|
|
|
if newval == _Gscanrunning || newval == _Gscanenqueue {
|
|
|
|
|
return cas(&gp.atomicstatus, oldval, newval)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("castogscanstatus")
|
2014-11-11 17:08:33 -05:00
|
|
|
panic("not reached")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
|
|
|
|
|
// and casfrom_Gscanstatus instead.
|
|
|
|
|
// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
|
|
|
|
|
// put it in the Gscan state is finished.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func casgstatus(gp *g, oldval, newval uint32) {
|
|
|
|
|
if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2015-01-26 13:51:39 -05:00
|
|
|
print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("casgstatus: bad incoming values")
|
2014-11-11 17:08:33 -05:00
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-17 13:56:03 -04:00
|
|
|
if oldval == _Grunning && gp.gcscanvalid {
|
|
|
|
|
// If oldvall == _Grunning, then the actual status must be
|
|
|
|
|
// _Grunning or _Grunning|_Gscan; either way,
|
|
|
|
|
// we own gp.gcscanvalid, so it's safe to read.
|
|
|
|
|
// gp.gcscanvalid must not be true when we are running.
|
|
|
|
|
print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
|
|
|
|
|
throw("casgstatus")
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// loop if gp->atomicstatus is in a scan state giving
|
|
|
|
|
// GC time to finish and change the state to oldval.
|
|
|
|
|
for !cas(&gp.atomicstatus, oldval, newval) {
|
2014-12-05 11:18:10 -05:00
|
|
|
if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
|
|
|
|
|
systemstack(func() {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("casgstatus: waiting for Gwaiting but is Grunnable")
|
2014-12-05 11:18:10 -05:00
|
|
|
})
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
// Help GC if needed.
|
2014-12-05 11:40:41 -05:00
|
|
|
// if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
|
|
|
|
|
// gp.preemptscan = false
|
|
|
|
|
// systemstack(func() {
|
|
|
|
|
// gcphasework(gp)
|
|
|
|
|
// })
|
|
|
|
|
// }
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-01-29 10:37:32 -05:00
|
|
|
if newval == _Grunning {
|
|
|
|
|
gp.gcscanvalid = false
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
2014-12-05 11:18:10 -05:00
|
|
|
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
|
|
|
|
|
// Returns old status. Cannot call casgstatus directly, because we are racing with an
|
|
|
|
|
// async wakeup that might come in from netpoll. If we see Gwaiting from the readgstatus,
|
|
|
|
|
// it might have become Grunnable by the time we get to the cas. If we called casgstatus,
|
|
|
|
|
// it would loop waiting for the status to go back to Gwaiting, which it never will.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func casgcopystack(gp *g) uint32 {
|
|
|
|
|
for {
|
|
|
|
|
oldstatus := readgstatus(gp) &^ _Gscan
|
|
|
|
|
if oldstatus != _Gwaiting && oldstatus != _Grunnable {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("copystack: bad status, not Gwaiting or Grunnable")
|
2014-12-05 11:18:10 -05:00
|
|
|
}
|
|
|
|
|
if cas(&gp.atomicstatus, oldstatus, _Gcopystack) {
|
|
|
|
|
return oldstatus
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-16 19:20:18 -04:00
|
|
|
// scang blocks until gp's stack has been scanned.
|
|
|
|
|
// It might be scanned by scang or it might be scanned by the goroutine itself.
|
|
|
|
|
// Either way, the stack scan has completed when scang returns.
|
|
|
|
|
func scang(gp *g) {
|
|
|
|
|
// Invariant; we (the caller, markroot for a specific goroutine) own gp.gcscandone.
|
|
|
|
|
// Nothing is racing with us now, but gcscandone might be set to true left over
|
|
|
|
|
// from an earlier round of stack scanning (we scan twice per GC).
|
|
|
|
|
// We use gcscandone to record whether the scan has been done during this round.
|
|
|
|
|
// It is important that the scan happens exactly once: if called twice,
|
|
|
|
|
// the installation of stack barriers will detect the double scan and die.
|
|
|
|
|
|
|
|
|
|
gp.gcscandone = false
|
|
|
|
|
|
|
|
|
|
// Endeavor to get gcscandone set to true,
|
|
|
|
|
// either by doing the stack scan ourselves or by coercing gp to scan itself.
|
|
|
|
|
// gp.gcscandone can transition from false to true when we're not looking
|
|
|
|
|
// (if we asked for preemption), so any time we lock the status using
|
|
|
|
|
// castogscanstatus we have to double-check that the scan is still not done.
|
|
|
|
|
for !gp.gcscandone {
|
2014-11-11 17:08:33 -05:00
|
|
|
switch s := readgstatus(gp); s {
|
|
|
|
|
default:
|
|
|
|
|
dumpgstatus(gp)
|
2015-06-16 19:20:18 -04:00
|
|
|
throw("stopg: invalid status")
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
case _Gdead:
|
2015-06-16 19:20:18 -04:00
|
|
|
// No stack.
|
|
|
|
|
gp.gcscandone = true
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
case _Gcopystack:
|
2015-06-16 19:20:18 -04:00
|
|
|
// Stack being switched. Go around again.
|
2014-11-11 17:08:33 -05:00
|
|
|
|
2015-06-16 19:20:18 -04:00
|
|
|
case _Grunnable, _Gsyscall, _Gwaiting:
|
2014-11-11 17:08:33 -05:00
|
|
|
// Claim goroutine by setting scan bit.
|
2015-06-16 19:20:18 -04:00
|
|
|
// Racing with execution or readying of gp.
|
|
|
|
|
// The scan bit keeps them from running
|
|
|
|
|
// the goroutine until we're done.
|
|
|
|
|
if castogscanstatus(gp, s, s|_Gscan) {
|
|
|
|
|
if !gp.gcscandone {
|
2015-07-28 14:33:39 -04:00
|
|
|
// Coordinate with traceback
|
|
|
|
|
// in sigprof.
|
|
|
|
|
for !cas(&gp.stackLock, 0, 1) {
|
|
|
|
|
osyield()
|
|
|
|
|
}
|
2015-06-16 19:20:18 -04:00
|
|
|
scanstack(gp)
|
2015-07-28 14:33:39 -04:00
|
|
|
atomicstore(&gp.stackLock, 0)
|
2015-06-16 19:20:18 -04:00
|
|
|
gp.gcscandone = true
|
|
|
|
|
}
|
|
|
|
|
restartg(gp)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
2015-06-16 19:20:18 -04:00
|
|
|
case _Gscanwaiting:
|
|
|
|
|
// newstack is doing a scan for us right now. Wait.
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
case _Grunning:
|
2015-06-16 19:20:18 -04:00
|
|
|
// Goroutine running. Try to preempt execution so it can scan itself.
|
|
|
|
|
// The preemption handler (in newstack) does the actual scan.
|
|
|
|
|
|
|
|
|
|
// Optimization: if there is already a pending preemption request
|
|
|
|
|
// (from the previous loop iteration), don't bother with the atomics.
|
|
|
|
|
if gp.preemptscan && gp.preempt && gp.stackguard0 == stackPreempt {
|
2014-11-11 17:08:33 -05:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-16 19:20:18 -04:00
|
|
|
// Ask for preemption and self scan.
|
|
|
|
|
if castogscanstatus(gp, _Grunning, _Gscanrunning) {
|
|
|
|
|
if !gp.gcscandone {
|
|
|
|
|
gp.preemptscan = true
|
|
|
|
|
gp.preempt = true
|
|
|
|
|
gp.stackguard0 = stackPreempt
|
|
|
|
|
}
|
|
|
|
|
casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-06-16 19:20:18 -04:00
|
|
|
|
|
|
|
|
gp.preemptscan = false // cancel scan request if no longer needed
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The GC requests that this routine be moved from a scanmumble state to a mumble state.
|
|
|
|
|
func restartg(gp *g) {
|
|
|
|
|
s := readgstatus(gp)
|
|
|
|
|
switch s {
|
|
|
|
|
default:
|
|
|
|
|
dumpgstatus(gp)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("restartg: unexpected status")
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
case _Gdead:
|
|
|
|
|
// ok
|
|
|
|
|
|
|
|
|
|
case _Gscanrunnable,
|
|
|
|
|
_Gscanwaiting,
|
|
|
|
|
_Gscansyscall:
|
|
|
|
|
casfrom_Gscanstatus(gp, s, s&^_Gscan)
|
|
|
|
|
|
|
|
|
|
// Scan is now completed.
|
|
|
|
|
// Goroutine now needs to be made runnable.
|
|
|
|
|
// We put it on the global run queue; ready blocks on the global scheduler lock.
|
|
|
|
|
case _Gscanenqueue:
|
|
|
|
|
casfrom_Gscanstatus(gp, _Gscanenqueue, _Gwaiting)
|
|
|
|
|
if gp != getg().m.curg {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("processing Gscanenqueue on wrong m")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
dropg()
|
2015-02-21 21:01:40 +03:00
|
|
|
ready(gp, 0)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-15 16:00:50 -04:00
|
|
|
// stopTheWorld stops all P's from executing goroutines, interrupting
|
|
|
|
|
// all goroutines at GC safe points and records reason as the reason
|
|
|
|
|
// for the stop. On return, only the current goroutine's P is running.
|
|
|
|
|
// stopTheWorld must not be called from a system stack and the caller
|
|
|
|
|
// must not hold worldsema. The caller must call startTheWorld when
|
|
|
|
|
// other P's should resume execution.
|
|
|
|
|
//
|
|
|
|
|
// stopTheWorld is safe for multiple goroutines to call at the
|
|
|
|
|
// same time. Each will execute its own stop, and the stops will
|
|
|
|
|
// be serialized.
|
|
|
|
|
//
|
|
|
|
|
// This is also used by routines that do stack dumps. If the system is
|
|
|
|
|
// in panic or being exited, this may not reliably stop all
|
|
|
|
|
// goroutines.
|
|
|
|
|
func stopTheWorld(reason string) {
|
|
|
|
|
semacquire(&worldsema, false)
|
|
|
|
|
getg().m.preemptoff = reason
|
|
|
|
|
systemstack(stopTheWorldWithSema)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// startTheWorld undoes the effects of stopTheWorld.
|
|
|
|
|
func startTheWorld() {
|
|
|
|
|
systemstack(startTheWorldWithSema)
|
2015-05-15 16:13:14 -04:00
|
|
|
// worldsema must be held over startTheWorldWithSema to ensure
|
|
|
|
|
// gomaxprocs cannot change while worldsema is held.
|
|
|
|
|
semrelease(&worldsema)
|
2015-05-15 16:10:00 -04:00
|
|
|
getg().m.preemptoff = ""
|
2015-05-15 16:00:50 -04:00
|
|
|
}
|
|
|
|
|
|
2015-05-15 16:13:14 -04:00
|
|
|
// Holding worldsema grants an M the right to try to stop the world
|
|
|
|
|
// and prevents gomaxprocs from changing concurrently.
|
2015-05-15 16:00:50 -04:00
|
|
|
var worldsema uint32 = 1
|
|
|
|
|
|
|
|
|
|
// stopTheWorldWithSema is the core implementation of stopTheWorld.
|
|
|
|
|
// The caller is responsible for acquiring worldsema and disabling
|
|
|
|
|
// preemption first and then should stopTheWorldWithSema on the system
|
|
|
|
|
// stack:
|
2015-02-19 13:38:46 -05:00
|
|
|
//
|
2015-05-15 16:00:50 -04:00
|
|
|
// semacquire(&worldsema, false)
|
|
|
|
|
// m.preemptoff = "reason"
|
|
|
|
|
// systemstack(stopTheWorldWithSema)
|
2015-02-19 13:38:46 -05:00
|
|
|
//
|
2015-05-15 16:00:50 -04:00
|
|
|
// When finished, the caller must either call startTheWorld or undo
|
|
|
|
|
// these three operations separately:
|
2015-02-19 13:38:46 -05:00
|
|
|
//
|
2015-05-15 16:00:50 -04:00
|
|
|
// m.preemptoff = ""
|
|
|
|
|
// systemstack(startTheWorldWithSema)
|
2015-05-15 16:13:14 -04:00
|
|
|
// semrelease(&worldsema)
|
2015-02-19 13:38:46 -05:00
|
|
|
//
|
2015-05-15 16:00:50 -04:00
|
|
|
// It is allowed to acquire worldsema once and then execute multiple
|
|
|
|
|
// startTheWorldWithSema/stopTheWorldWithSema pairs.
|
|
|
|
|
// Other P's are able to execute between successive calls to
|
|
|
|
|
// startTheWorldWithSema and stopTheWorldWithSema.
|
|
|
|
|
// Holding worldsema causes any other goroutines invoking
|
|
|
|
|
// stopTheWorld to block.
|
|
|
|
|
func stopTheWorldWithSema() {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
// If we hold a lock, then we won't be able to stop another M
|
|
|
|
|
// that is blocked trying to acquire the lock.
|
|
|
|
|
if _g_.m.locks > 0 {
|
2015-05-15 16:00:50 -04:00
|
|
|
throw("stopTheWorld: holding locks")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
sched.stopwait = gomaxprocs
|
|
|
|
|
atomicstore(&sched.gcwaiting, 1)
|
|
|
|
|
preemptall()
|
|
|
|
|
// stop current P
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic.
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.stopwait--
|
|
|
|
|
// try to retake all P's in Psyscall status
|
|
|
|
|
for i := 0; i < int(gomaxprocs); i++ {
|
|
|
|
|
p := allp[i]
|
|
|
|
|
s := p.status
|
|
|
|
|
if s == _Psyscall && cas(&p.status, s, _Pgcstop) {
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoSysBlock(p)
|
|
|
|
|
traceProcStop(p)
|
|
|
|
|
}
|
|
|
|
|
p.syscalltick++
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.stopwait--
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// stop idle P's
|
|
|
|
|
for {
|
|
|
|
|
p := pidleget()
|
|
|
|
|
if p == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
p.status = _Pgcstop
|
|
|
|
|
sched.stopwait--
|
|
|
|
|
}
|
|
|
|
|
wait := sched.stopwait > 0
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
|
|
|
|
|
// wait for remaining P's to stop voluntarily
|
|
|
|
|
if wait {
|
|
|
|
|
for {
|
|
|
|
|
// wait for 100us, then try to re-preempt in case of any races
|
|
|
|
|
if notetsleep(&sched.stopnote, 100*1000) {
|
|
|
|
|
noteclear(&sched.stopnote)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
preemptall()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if sched.stopwait != 0 {
|
2015-05-15 16:00:50 -04:00
|
|
|
throw("stopTheWorld: not stopped")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
for i := 0; i < int(gomaxprocs); i++ {
|
|
|
|
|
p := allp[i]
|
|
|
|
|
if p.status != _Pgcstop {
|
2015-05-15 16:00:50 -04:00
|
|
|
throw("stopTheWorld: not stopped")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func mhelpgc() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
_g_.m.helpgc = -1
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-15 16:00:50 -04:00
|
|
|
func startTheWorldWithSema() {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
_g_.m.locks++ // disable preemption because it can be holding p in a local var
|
|
|
|
|
gp := netpoll(false) // non-blocking
|
|
|
|
|
injectglist(gp)
|
|
|
|
|
add := needaddgcproc()
|
|
|
|
|
lock(&sched.lock)
|
2014-12-22 18:14:00 +03:00
|
|
|
|
|
|
|
|
procs := gomaxprocs
|
2014-11-11 17:08:33 -05:00
|
|
|
if newprocs != 0 {
|
2014-12-22 18:14:00 +03:00
|
|
|
procs = newprocs
|
2014-11-11 17:08:33 -05:00
|
|
|
newprocs = 0
|
|
|
|
|
}
|
2014-12-22 18:14:00 +03:00
|
|
|
p1 := procresize(procs)
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.gcwaiting = 0
|
|
|
|
|
if sched.sysmonwait != 0 {
|
|
|
|
|
sched.sysmonwait = 0
|
|
|
|
|
notewakeup(&sched.sysmonnote)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
|
|
|
|
|
for p1 != nil {
|
|
|
|
|
p := p1
|
2015-04-17 00:21:30 -04:00
|
|
|
p1 = p1.link.ptr()
|
|
|
|
|
if p.m != 0 {
|
|
|
|
|
mp := p.m.ptr()
|
|
|
|
|
p.m = 0
|
|
|
|
|
if mp.nextp != 0 {
|
2015-05-15 16:00:50 -04:00
|
|
|
throw("startTheWorld: inconsistent mp->nextp")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.nextp.set(p)
|
2014-11-11 17:08:33 -05:00
|
|
|
notewakeup(&mp.park)
|
|
|
|
|
} else {
|
|
|
|
|
// Start M to run P. Do not start another M below.
|
2015-02-12 10:18:31 +03:00
|
|
|
newm(nil, p)
|
2014-11-11 17:08:33 -05:00
|
|
|
add = false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-22 18:14:00 +03:00
|
|
|
// Wakeup an additional proc in case we have excessive runnable goroutines
|
|
|
|
|
// in local queues or in the global queue. If we don't, the proc will park itself.
|
|
|
|
|
// If we have lots of excessive work, resetspinning will unpark additional procs as necessary.
|
|
|
|
|
if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 {
|
|
|
|
|
wakep()
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
if add {
|
|
|
|
|
// If GC could have used another helper proc, start one now,
|
|
|
|
|
// in the hope that it will be available next time.
|
|
|
|
|
// It would have been even better to start it before the collection,
|
|
|
|
|
// but doing so requires allocating memory, so it's tricky to
|
|
|
|
|
// coordinate. This lazy approach works out in practice:
|
|
|
|
|
// we don't mind if the first couple gc rounds don't have quite
|
|
|
|
|
// the maximum number of procs.
|
2015-02-12 10:18:31 +03:00
|
|
|
newm(mhelpgc, nil)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Called to start an M.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func mstart() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if _g_.stack.lo == 0 {
|
|
|
|
|
// Initialize stack bounds from system stack.
|
|
|
|
|
// Cgo may have left stack size in stack.hi.
|
|
|
|
|
size := _g_.stack.hi
|
|
|
|
|
if size == 0 {
|
2015-04-30 16:57:23 -07:00
|
|
|
size = 8192 * stackGuardMultiplier
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
|
|
|
|
|
_g_.stack.lo = _g_.stack.hi - size + 1024
|
|
|
|
|
}
|
|
|
|
|
// Initialize stack guards so that we can start calling
|
|
|
|
|
// both Go and C functions with stack growth prologues.
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = _g_.stack.lo + _StackGuard
|
|
|
|
|
_g_.stackguard1 = _g_.stackguard0
|
2014-11-11 17:08:33 -05:00
|
|
|
mstart1()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func mstart1() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if _g_ != _g_.m.g0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad runtime·mstart")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Record top of stack for use by mcall.
|
|
|
|
|
// Once we call schedule we're never coming back,
|
|
|
|
|
// so other calls can reuse this stack space.
|
|
|
|
|
gosave(&_g_.m.g0.sched)
|
|
|
|
|
_g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used
|
|
|
|
|
asminit()
|
|
|
|
|
minit()
|
|
|
|
|
|
|
|
|
|
// Install signal handlers; after minit so that minit can
|
|
|
|
|
// prepare the thread to be able to handle the signals.
|
|
|
|
|
if _g_.m == &m0 {
|
2015-03-24 09:22:35 -04:00
|
|
|
// Create an extra M for callbacks on threads not created by Go.
|
2015-03-25 07:10:45 -04:00
|
|
|
if iscgo && !cgoHasExtraM {
|
|
|
|
|
cgoHasExtraM = true
|
2015-03-24 09:22:35 -04:00
|
|
|
newextram()
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
initsig()
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
if fn := _g_.m.mstartfn; fn != nil {
|
2014-11-11 17:08:33 -05:00
|
|
|
fn()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if _g_.m.helpgc != 0 {
|
|
|
|
|
_g_.m.helpgc = 0
|
|
|
|
|
stopm()
|
|
|
|
|
} else if _g_.m != &m0 {
|
2015-04-17 00:21:30 -04:00
|
|
|
acquirep(_g_.m.nextp.ptr())
|
|
|
|
|
_g_.m.nextp = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
schedule()
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-27 16:49:12 -04:00
|
|
|
// forEachP calls fn(p) for every P p when p reaches a GC safe point.
|
|
|
|
|
// If a P is currently executing code, this will bring the P to a GC
|
|
|
|
|
// safe point and execute fn on that P. If the P is not executing code
|
|
|
|
|
// (it is idle or in a syscall), this will call fn(p) directly while
|
|
|
|
|
// preventing the P from exiting its state. This does not ensure that
|
2015-04-27 15:42:45 -04:00
|
|
|
// fn will run on every CPU executing Go code, but it acts as a global
|
2015-03-27 16:49:12 -04:00
|
|
|
// memory barrier. GC uses this as a "ragged barrier."
|
|
|
|
|
//
|
|
|
|
|
// The caller must hold worldsema.
|
|
|
|
|
func forEachP(fn func(*p)) {
|
|
|
|
|
mp := acquirem()
|
|
|
|
|
_p_ := getg().m.p.ptr()
|
|
|
|
|
|
|
|
|
|
lock(&sched.lock)
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
if sched.safePointWait != 0 {
|
|
|
|
|
throw("forEachP: sched.safePointWait != 0")
|
2015-03-27 16:49:12 -04:00
|
|
|
}
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
sched.safePointWait = gomaxprocs - 1
|
2015-03-27 16:49:12 -04:00
|
|
|
sched.safePointFn = fn
|
|
|
|
|
|
|
|
|
|
// Ask all Ps to run the safe point function.
|
|
|
|
|
for _, p := range allp[:gomaxprocs] {
|
|
|
|
|
if p != _p_ {
|
|
|
|
|
atomicstore(&p.runSafePointFn, 1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
preemptall()
|
|
|
|
|
|
|
|
|
|
// Any P entering _Pidle or _Psyscall from now on will observe
|
|
|
|
|
// p.runSafePointFn == 1 and will call runSafePointFn when
|
|
|
|
|
// changing its status to _Pidle/_Psyscall.
|
|
|
|
|
|
|
|
|
|
// Run safe point function for all idle Ps. sched.pidle will
|
|
|
|
|
// not change because we hold sched.lock.
|
|
|
|
|
for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
|
|
|
|
|
if cas(&p.runSafePointFn, 1, 0) {
|
|
|
|
|
fn(p)
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
sched.safePointWait--
|
2015-03-27 16:49:12 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
wait := sched.safePointWait > 0
|
2015-03-27 16:49:12 -04:00
|
|
|
unlock(&sched.lock)
|
|
|
|
|
|
|
|
|
|
// Run fn for the current P.
|
|
|
|
|
fn(_p_)
|
|
|
|
|
|
|
|
|
|
// Force Ps currently in _Psyscall into _Pidle and hand them
|
|
|
|
|
// off to induce safe point function execution.
|
|
|
|
|
for i := 0; i < int(gomaxprocs); i++ {
|
|
|
|
|
p := allp[i]
|
|
|
|
|
s := p.status
|
|
|
|
|
if s == _Psyscall && p.runSafePointFn == 1 && cas(&p.status, s, _Pidle) {
|
|
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoSysBlock(p)
|
|
|
|
|
traceProcStop(p)
|
|
|
|
|
}
|
|
|
|
|
p.syscalltick++
|
|
|
|
|
handoffp(p)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Wait for remaining Ps to run fn.
|
|
|
|
|
if wait {
|
|
|
|
|
for {
|
|
|
|
|
// Wait for 100us, then try to re-preempt in
|
|
|
|
|
// case of any races.
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
if notetsleep(&sched.safePointNote, 100*1000) {
|
|
|
|
|
noteclear(&sched.safePointNote)
|
2015-03-27 16:49:12 -04:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
preemptall()
|
|
|
|
|
}
|
|
|
|
|
}
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
if sched.safePointWait != 0 {
|
|
|
|
|
throw("forEachP: not done")
|
2015-03-27 16:49:12 -04:00
|
|
|
}
|
|
|
|
|
for i := 0; i < int(gomaxprocs); i++ {
|
|
|
|
|
p := allp[i]
|
|
|
|
|
if p.runSafePointFn != 0 {
|
|
|
|
|
throw("forEachP: P did not run fn")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
sched.safePointFn = nil
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// runSafePointFn runs the safe point function, if any, for this P.
|
|
|
|
|
// This should be called like
|
|
|
|
|
//
|
|
|
|
|
// if getg().m.p.runSafePointFn != 0 {
|
|
|
|
|
// runSafePointFn()
|
|
|
|
|
// }
|
|
|
|
|
//
|
|
|
|
|
// runSafePointFn must be checked on any transition in to _Pidle or
|
|
|
|
|
// _Psyscall to avoid a race where forEachP sees that the P is running
|
|
|
|
|
// just before the P goes into _Pidle/_Psyscall and neither forEachP
|
|
|
|
|
// nor the P run the safe-point function.
|
|
|
|
|
func runSafePointFn() {
|
|
|
|
|
p := getg().m.p.ptr()
|
|
|
|
|
// Resolve the race between forEachP running the safe-point
|
|
|
|
|
// function on this P's behalf and this P running the
|
|
|
|
|
// safe-point function directly.
|
|
|
|
|
if !cas(&p.runSafePointFn, 1, 0) {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
sched.safePointFn(p)
|
|
|
|
|
lock(&sched.lock)
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
sched.safePointWait--
|
|
|
|
|
if sched.safePointWait == 0 {
|
|
|
|
|
notewakeup(&sched.safePointNote)
|
2015-03-27 16:49:12 -04:00
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// When running with cgo, we call _cgo_thread_start
|
|
|
|
|
// to start threads for us so that we can play nicely with
|
|
|
|
|
// foreign code.
|
|
|
|
|
var cgoThreadStart unsafe.Pointer
|
|
|
|
|
|
|
|
|
|
type cgothreadstart struct {
|
2015-04-17 00:21:30 -04:00
|
|
|
g guintptr
|
2014-11-11 17:08:33 -05:00
|
|
|
tls *uint64
|
|
|
|
|
fn unsafe.Pointer
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a new m unassociated with any thread.
|
|
|
|
|
// Can use p for allocation context if needed.
|
2015-04-17 00:21:30 -04:00
|
|
|
// fn is recorded as the new m's m.mstartfn.
|
|
|
|
|
func allocm(_p_ *p, fn func()) *m {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
_g_.m.locks++ // disable GC because it can be called from sysmon
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p == 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
acquirep(_p_) // temporarily borrow p for mallocs in this function
|
|
|
|
|
}
|
2015-02-03 11:20:58 +03:00
|
|
|
mp := new(m)
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.mstartfn = fn
|
2014-11-11 17:08:33 -05:00
|
|
|
mcommoninit(mp)
|
|
|
|
|
|
|
|
|
|
// In case of cgo or Solaris, pthread_create will make us a stack.
|
|
|
|
|
// Windows and Plan 9 will layout sched stack on OS stack.
|
|
|
|
|
if iscgo || GOOS == "solaris" || GOOS == "windows" || GOOS == "plan9" {
|
|
|
|
|
mp.g0 = malg(-1)
|
|
|
|
|
} else {
|
2015-04-30 16:57:23 -07:00
|
|
|
mp.g0 = malg(8192 * stackGuardMultiplier)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
mp.g0.m = mp
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
if _p_ == _g_.m.p.ptr() {
|
2014-11-11 17:08:33 -05:00
|
|
|
releasep()
|
|
|
|
|
}
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return mp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// needm is called when a cgo callback happens on a
|
|
|
|
|
// thread without an m (a thread not created by Go).
|
|
|
|
|
// In this case, needm is expected to find an m to use
|
|
|
|
|
// and return with m, g initialized correctly.
|
|
|
|
|
// Since m and g are not set now (likely nil, but see below)
|
|
|
|
|
// needm is limited in what routines it can call. In particular
|
|
|
|
|
// it can only call nosplit functions (textflag 7) and cannot
|
|
|
|
|
// do any scheduling that requires an m.
|
|
|
|
|
//
|
|
|
|
|
// In order to avoid needing heavy lifting here, we adopt
|
|
|
|
|
// the following strategy: there is a stack of available m's
|
|
|
|
|
// that can be stolen. Using compare-and-swap
|
|
|
|
|
// to pop from the stack has ABA races, so we simulate
|
|
|
|
|
// a lock by doing an exchange (via casp) to steal the stack
|
|
|
|
|
// head and replace the top pointer with MLOCKED (1).
|
|
|
|
|
// This serves as a simple spin lock that we can use even
|
|
|
|
|
// without an m. The thread that locks the stack in this way
|
|
|
|
|
// unlocks the stack by storing a valid stack head pointer.
|
|
|
|
|
//
|
|
|
|
|
// In order to make sure that there is always an m structure
|
|
|
|
|
// available to be stolen, we maintain the invariant that there
|
|
|
|
|
// is always one more than needed. At the beginning of the
|
|
|
|
|
// program (if cgo is in use) the list is seeded with a single m.
|
|
|
|
|
// If needm finds that it has taken the last m off the list, its job
|
|
|
|
|
// is - once it has installed its own m so that it can do things like
|
|
|
|
|
// allocate memory - to create a spare m and put it on the list.
|
|
|
|
|
//
|
|
|
|
|
// Each of these extra m's also has a g0 and a curg that are
|
|
|
|
|
// pressed into service as the scheduling stack and current
|
|
|
|
|
// goroutine for the duration of the cgo callback.
|
|
|
|
|
//
|
|
|
|
|
// When the callback is done with the m, it calls dropm to
|
|
|
|
|
// put the m back on the list.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func needm(x byte) {
|
2015-03-25 07:10:45 -04:00
|
|
|
if iscgo && !cgoHasExtraM {
|
2014-11-11 17:08:33 -05:00
|
|
|
// Can happen if C/C++ code calls Go from a global ctor.
|
|
|
|
|
// Can not throw, because scheduler is not initialized yet.
|
2014-12-09 12:31:11 -05:00
|
|
|
write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
|
2014-11-11 17:08:33 -05:00
|
|
|
exit(1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Lock extra list, take head, unlock popped list.
|
|
|
|
|
// nilokay=false is safe here because of the invariant above,
|
|
|
|
|
// that the extra list always contains or will soon contain
|
|
|
|
|
// at least one m.
|
|
|
|
|
mp := lockextra(false)
|
|
|
|
|
|
|
|
|
|
// Set needextram when we've just emptied the list,
|
|
|
|
|
// so that the eventual call into cgocallbackg will
|
|
|
|
|
// allocate a new m for the extra list. We delay the
|
|
|
|
|
// allocation until then so that it can be done
|
|
|
|
|
// after exitsyscall makes sure it is okay to be
|
|
|
|
|
// running at all (that is, there's no garbage collection
|
|
|
|
|
// running right now).
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.needextram = mp.schedlink == 0
|
|
|
|
|
unlockextra(mp.schedlink.ptr())
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
// Install g (= m->g0) and set the stack bounds
|
|
|
|
|
// to match the current stack. We don't actually know
|
|
|
|
|
// how big the stack is, like we don't know how big any
|
|
|
|
|
// scheduling stack is, but we assume there's at least 32 kB,
|
|
|
|
|
// which is more than enough for us.
|
|
|
|
|
setg(mp.g0)
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024
|
|
|
|
|
_g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = _g_.stack.lo + _StackGuard
|
2014-11-11 17:08:33 -05:00
|
|
|
|
runtime: don't always unblock all signals
Ian proposed an improved way of handling signals masks in Go, motivated
by a problem where the Android java runtime expects certain signals to
be blocked for all JVM threads. Discussion here
https://groups.google.com/forum/#!topic/golang-dev/_TSCkQHJt6g
Ian's text is used in the following:
A Go program always needs to have the synchronous signals enabled.
These are the signals for which _SigPanic is set in sigtable, namely
SIGSEGV, SIGBUS, SIGFPE.
A Go program that uses the os/signal package, and calls signal.Notify,
needs to have at least one thread which is not blocking that signal,
but it doesn't matter much which one.
Unix programs do not change signal mask across execve. They inherit
signal masks across fork. The shell uses this fact to some extent;
for example, the job control signals (SIGTTIN, SIGTTOU, SIGTSTP) are
blocked for commands run due to backquote quoting or $().
Our current position on signal masks was not thought out. We wandered
into step by step, e.g., http://golang.org/cl/7323067 .
This CL does the following:
Introduce a new platform hook, msigsave, that saves the signal mask of
the current thread to m.sigsave.
Call msigsave from needm and newm.
In minit grab set up the signal mask from m.sigsave and unblock the
essential synchronous signals, and SIGILL, SIGTRAP, SIGPROF, SIGSTKFLT
(for systems that have it).
In unminit, restore the signal mask from m.sigsave.
The first time that os/signal.Notify is called, start a new thread whose
only purpose is to update its signal mask to make sure signals for
signal.Notify are unblocked on at least one thread.
The effect on Go programs will be that if they are invoked with some
non-synchronous signals blocked, those signals will normally be
ignored. Previously, those signals would mostly be ignored. A change
in behaviour will occur for programs started with any of these signals
blocked, if they receive the signal: SIGHUP, SIGINT, SIGQUIT, SIGABRT,
SIGTERM. Previously those signals would always cause a crash (unless
using the os/signal package); with this change, they will be ignored
if the program is started with the signal blocked (and does not use
the os/signal package).
./all.bash completes successfully on linux/amd64.
OpenBSD is missing the implementation.
Change-Id: I188098ba7eb85eae4c14861269cc466f2aa40e8c
Reviewed-on: https://go-review.googlesource.com/10173
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2015-05-18 11:00:24 +02:00
|
|
|
msigsave(mp)
|
2014-11-11 17:08:33 -05:00
|
|
|
// Initialize this thread to use the m.
|
|
|
|
|
asminit()
|
|
|
|
|
minit()
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-09 12:31:11 -05:00
|
|
|
var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// newextram allocates an m and puts it on the extra list.
|
|
|
|
|
// It is called with a working local m, so that it can do things
|
|
|
|
|
// like call schedlock and allocate.
|
|
|
|
|
func newextram() {
|
|
|
|
|
// Create extra goroutine locked to extra m.
|
|
|
|
|
// The goroutine is the context in which the cgo callback will run.
|
|
|
|
|
// The sched.pc will never be returned to, but setting it to
|
|
|
|
|
// goexit makes clear to the traceback routines where
|
|
|
|
|
// the goroutine stack ends.
|
2015-04-17 00:21:30 -04:00
|
|
|
mp := allocm(nil, nil)
|
2014-11-11 17:08:33 -05:00
|
|
|
gp := malg(4096)
|
|
|
|
|
gp.sched.pc = funcPC(goexit) + _PCQuantum
|
|
|
|
|
gp.sched.sp = gp.stack.hi
|
|
|
|
|
gp.sched.sp -= 4 * regSize // extra space in case of reads slightly beyond frame
|
|
|
|
|
gp.sched.lr = 0
|
2014-12-22 22:43:49 -05:00
|
|
|
gp.sched.g = guintptr(unsafe.Pointer(gp))
|
2014-11-11 17:08:33 -05:00
|
|
|
gp.syscallpc = gp.sched.pc
|
|
|
|
|
gp.syscallsp = gp.sched.sp
|
|
|
|
|
// malg returns status as Gidle, change to Gsyscall before adding to allg
|
|
|
|
|
// where GC will see it.
|
|
|
|
|
casgstatus(gp, _Gidle, _Gsyscall)
|
|
|
|
|
gp.m = mp
|
|
|
|
|
mp.curg = gp
|
|
|
|
|
mp.locked = _LockInternal
|
|
|
|
|
mp.lockedg = gp
|
|
|
|
|
gp.lockedm = mp
|
|
|
|
|
gp.goid = int64(xadd64(&sched.goidgen, 1))
|
|
|
|
|
if raceenabled {
|
|
|
|
|
gp.racectx = racegostart(funcPC(newextram))
|
|
|
|
|
}
|
|
|
|
|
// put on allg for garbage collector
|
|
|
|
|
allgadd(gp)
|
|
|
|
|
|
|
|
|
|
// Add m to the extra list.
|
|
|
|
|
mnext := lockextra(true)
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.schedlink.set(mnext)
|
2014-11-11 17:08:33 -05:00
|
|
|
unlockextra(mp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dropm is called when a cgo callback has called needm but is now
|
|
|
|
|
// done with the callback and returning back into the non-Go thread.
|
|
|
|
|
// It puts the current m back onto the extra list.
|
|
|
|
|
//
|
|
|
|
|
// The main expense here is the call to signalstack to release the
|
|
|
|
|
// m's signal stack, and then the call to needm on the next callback
|
|
|
|
|
// from this thread. It is tempting to try to save the m for next time,
|
|
|
|
|
// which would eliminate both these costs, but there might not be
|
|
|
|
|
// a next time: the current thread (which Go does not control) might exit.
|
|
|
|
|
// If we saved the m for that thread, there would be an m leak each time
|
|
|
|
|
// such a thread exited. Instead, we acquire and release an m on each
|
|
|
|
|
// call. These should typically not be scheduling operations, just a few
|
|
|
|
|
// atomics, so the cost should be small.
|
|
|
|
|
//
|
|
|
|
|
// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
|
|
|
|
|
// variable using pthread_key_create. Unlike the pthread keys we already use
|
|
|
|
|
// on OS X, this dummy key would never be read by Go code. It would exist
|
|
|
|
|
// only so that we could register at thread-exit-time destructor.
|
|
|
|
|
// That destructor would put the m back onto the extra list.
|
|
|
|
|
// This is purely a performance optimization. The current version,
|
|
|
|
|
// in which dropm happens on each cgo call, is still correct too.
|
|
|
|
|
// We may have to keep the current version on systems with cgo
|
|
|
|
|
// but without pthreads, like Windows.
|
|
|
|
|
func dropm() {
|
|
|
|
|
// Undo whatever initialization minit did during needm.
|
|
|
|
|
unminit()
|
|
|
|
|
|
|
|
|
|
// Clear m and g, and return m to the extra list.
|
2015-01-06 13:56:21 -05:00
|
|
|
// After the call to setg we can only call nosplit functions
|
|
|
|
|
// with no pointer manipulation.
|
2014-11-11 17:08:33 -05:00
|
|
|
mp := getg().m
|
|
|
|
|
mnext := lockextra(true)
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.schedlink.set(mnext)
|
2015-01-06 13:56:21 -05:00
|
|
|
|
|
|
|
|
setg(nil)
|
2014-11-11 17:08:33 -05:00
|
|
|
unlockextra(mp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var extram uintptr
|
|
|
|
|
|
|
|
|
|
// lockextra locks the extra list and returns the list head.
|
|
|
|
|
// The caller must unlock the list by storing a new list head
|
|
|
|
|
// to extram. If nilokay is true, then lockextra will
|
|
|
|
|
// return a nil list head if that's what it finds. If nilokay is false,
|
|
|
|
|
// lockextra will keep waiting until the list head is no longer nil.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func lockextra(nilokay bool) *m {
|
|
|
|
|
const locked = 1
|
|
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
old := atomicloaduintptr(&extram)
|
|
|
|
|
if old == locked {
|
|
|
|
|
yield := osyield
|
|
|
|
|
yield()
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if old == 0 && !nilokay {
|
|
|
|
|
usleep(1)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if casuintptr(&extram, old, locked) {
|
|
|
|
|
return (*m)(unsafe.Pointer(old))
|
|
|
|
|
}
|
|
|
|
|
yield := osyield
|
|
|
|
|
yield()
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func unlockextra(mp *m) {
|
|
|
|
|
atomicstoreuintptr(&extram, uintptr(unsafe.Pointer(mp)))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a new m. It will start off with a call to fn, or else the scheduler.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// fn needs to be static and not a heap allocated closure.
|
2015-03-29 10:20:54 -04:00
|
|
|
// May run with m.p==nil, so write barriers are not allowed.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
//go:nowritebarrier
|
2015-02-12 10:18:31 +03:00
|
|
|
func newm(fn func(), _p_ *p) {
|
2015-04-17 00:21:30 -04:00
|
|
|
mp := allocm(_p_, fn)
|
|
|
|
|
mp.nextp.set(_p_)
|
runtime: don't always unblock all signals
Ian proposed an improved way of handling signals masks in Go, motivated
by a problem where the Android java runtime expects certain signals to
be blocked for all JVM threads. Discussion here
https://groups.google.com/forum/#!topic/golang-dev/_TSCkQHJt6g
Ian's text is used in the following:
A Go program always needs to have the synchronous signals enabled.
These are the signals for which _SigPanic is set in sigtable, namely
SIGSEGV, SIGBUS, SIGFPE.
A Go program that uses the os/signal package, and calls signal.Notify,
needs to have at least one thread which is not blocking that signal,
but it doesn't matter much which one.
Unix programs do not change signal mask across execve. They inherit
signal masks across fork. The shell uses this fact to some extent;
for example, the job control signals (SIGTTIN, SIGTTOU, SIGTSTP) are
blocked for commands run due to backquote quoting or $().
Our current position on signal masks was not thought out. We wandered
into step by step, e.g., http://golang.org/cl/7323067 .
This CL does the following:
Introduce a new platform hook, msigsave, that saves the signal mask of
the current thread to m.sigsave.
Call msigsave from needm and newm.
In minit grab set up the signal mask from m.sigsave and unblock the
essential synchronous signals, and SIGILL, SIGTRAP, SIGPROF, SIGSTKFLT
(for systems that have it).
In unminit, restore the signal mask from m.sigsave.
The first time that os/signal.Notify is called, start a new thread whose
only purpose is to update its signal mask to make sure signals for
signal.Notify are unblocked on at least one thread.
The effect on Go programs will be that if they are invoked with some
non-synchronous signals blocked, those signals will normally be
ignored. Previously, those signals would mostly be ignored. A change
in behaviour will occur for programs started with any of these signals
blocked, if they receive the signal: SIGHUP, SIGINT, SIGQUIT, SIGABRT,
SIGTERM. Previously those signals would always cause a crash (unless
using the os/signal package); with this change, they will be ignored
if the program is started with the signal blocked (and does not use
the os/signal package).
./all.bash completes successfully on linux/amd64.
OpenBSD is missing the implementation.
Change-Id: I188098ba7eb85eae4c14861269cc466f2aa40e8c
Reviewed-on: https://go-review.googlesource.com/10173
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2015-05-18 11:00:24 +02:00
|
|
|
msigsave(mp)
|
2014-11-11 17:08:33 -05:00
|
|
|
if iscgo {
|
|
|
|
|
var ts cgothreadstart
|
|
|
|
|
if _cgo_thread_start == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("_cgo_thread_start missing")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
ts.g.set(mp.g0)
|
2014-11-11 17:08:33 -05:00
|
|
|
ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0]))
|
|
|
|
|
ts.fn = unsafe.Pointer(funcPC(mstart))
|
2015-04-27 17:32:23 +10:00
|
|
|
asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts))
|
2014-11-11 17:08:33 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
newosproc(mp, unsafe.Pointer(mp.g0.stack.hi))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Stops execution of the current m until new work is available.
|
|
|
|
|
// Returns with acquired P.
|
|
|
|
|
func stopm() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if _g_.m.locks != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("stopm holding locks")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("stopm holding p")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if _g_.m.spinning {
|
|
|
|
|
_g_.m.spinning = false
|
|
|
|
|
xadd(&sched.nmspinning, -1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
retry:
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
mput(_g_.m)
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
notesleep(&_g_.m.park)
|
|
|
|
|
noteclear(&_g_.m.park)
|
|
|
|
|
if _g_.m.helpgc != 0 {
|
|
|
|
|
gchelper()
|
|
|
|
|
_g_.m.helpgc = 0
|
|
|
|
|
_g_.m.mcache = nil
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
goto retry
|
|
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
acquirep(_g_.m.nextp.ptr())
|
|
|
|
|
_g_.m.nextp = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func mspinning() {
|
runtime: reset spinning in mspinning if work was ready()ed
This fixes a bug where the runtime ready()s a goroutine while setting
up a new M that's initially marked as spinning, causing the scheduler
to later panic when it finds work in the run queue of a P associated
with a spinning M. Specifically, the sequence of events that can lead
to this is:
1) sysmon calls handoffp to hand off a P stolen from a syscall.
2) handoffp sees no pending work on the P, so it calls startm with
spinning set.
3) startm calls newm, which in turn calls allocm to allocate a new M.
4) allocm "borrows" the P we're handing off in order to do allocation
and performs this allocation.
5) This allocation may assist the garbage collector, and this assist
may detect the end of concurrent mark and ready() the main GC
goroutine to signal this.
6) This ready()ing puts the GC goroutine on the run queue of the
borrowed P.
7) newm starts the OS thread, which runs mstart and subsequently
mstart1, which marks the M spinning because startm was called with
spinning set.
8) mstart1 enters the scheduler, which panics because there's work on
the run queue, but the M is marked spinning.
To fix this, before marking the M spinning in step 7, add a check to
see if work was been added to the P's run queue. If this is the case,
undo the spinning instead.
Fixes #10573.
Change-Id: I4670495ae00582144a55ce88c45ae71de597cfa5
Reviewed-on: https://go-review.googlesource.com/9332
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
2015-04-24 22:33:13 -04:00
|
|
|
gp := getg()
|
|
|
|
|
if !runqempty(gp.m.nextp.ptr()) {
|
|
|
|
|
// Something (presumably the GC) was readied while the
|
|
|
|
|
// runtime was starting up this M, so the M is no
|
|
|
|
|
// longer spinning.
|
|
|
|
|
if int32(xadd(&sched.nmspinning, -1)) < 0 {
|
|
|
|
|
throw("mspinning: nmspinning underflowed")
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
gp.m.spinning = true
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Schedules some M to run the p (creates an M if necessary).
|
|
|
|
|
// If p==nil, tries to get an idle P, if no idle P's does nothing.
|
2015-03-29 10:20:54 -04:00
|
|
|
// May run with m.p==nil, so write barriers are not allowed.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func startm(_p_ *p, spinning bool) {
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
if _p_ == nil {
|
|
|
|
|
_p_ = pidleget()
|
|
|
|
|
if _p_ == nil {
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if spinning {
|
|
|
|
|
xadd(&sched.nmspinning, -1)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
mp := mget()
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if mp == nil {
|
|
|
|
|
var fn func()
|
|
|
|
|
if spinning {
|
|
|
|
|
fn = mspinning
|
|
|
|
|
}
|
2015-02-12 10:18:31 +03:00
|
|
|
newm(fn, _p_)
|
2014-11-11 17:08:33 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if mp.spinning {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("startm: m is spinning")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
if mp.nextp != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("startm: m has p")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
runtime: reset spinning in mspinning if work was ready()ed
This fixes a bug where the runtime ready()s a goroutine while setting
up a new M that's initially marked as spinning, causing the scheduler
to later panic when it finds work in the run queue of a P associated
with a spinning M. Specifically, the sequence of events that can lead
to this is:
1) sysmon calls handoffp to hand off a P stolen from a syscall.
2) handoffp sees no pending work on the P, so it calls startm with
spinning set.
3) startm calls newm, which in turn calls allocm to allocate a new M.
4) allocm "borrows" the P we're handing off in order to do allocation
and performs this allocation.
5) This allocation may assist the garbage collector, and this assist
may detect the end of concurrent mark and ready() the main GC
goroutine to signal this.
6) This ready()ing puts the GC goroutine on the run queue of the
borrowed P.
7) newm starts the OS thread, which runs mstart and subsequently
mstart1, which marks the M spinning because startm was called with
spinning set.
8) mstart1 enters the scheduler, which panics because there's work on
the run queue, but the M is marked spinning.
To fix this, before marking the M spinning in step 7, add a check to
see if work was been added to the P's run queue. If this is the case,
undo the spinning instead.
Fixes #10573.
Change-Id: I4670495ae00582144a55ce88c45ae71de597cfa5
Reviewed-on: https://go-review.googlesource.com/9332
Reviewed-by: Russ Cox <rsc@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
2015-04-24 22:33:13 -04:00
|
|
|
if spinning && !runqempty(_p_) {
|
|
|
|
|
throw("startm: p has runnable gs")
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
mp.spinning = spinning
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.nextp.set(_p_)
|
2014-11-11 17:08:33 -05:00
|
|
|
notewakeup(&mp.park)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Hands off P from syscall or locked M.
|
runtime: disallow write barriers in handoffp and callees
handoffp by definition runs without a P, so it's not allowed to have
write barriers. It doesn't have any right now, but mark it
nowritebarrier to disallow any creeping in in the future. handoffp in
turns calls startm, newm, and newosproc, all of which are "below Go"
and make sense to run without a P, so disallow write barriers in these
as well.
For most functions, we've done this because they may race with
stoptheworld() and hence must not have write barriers. For these
functions, it's a little different: the world can't stop while we're
in handoffp, so this race isn't present. But we implement this
restriction with a somewhat broader rule that you can't have a write
barrier without a P. We like this rule because it's simple and means
that our write barriers can depend on there being a P, even though
this rule is actually a little broader than necessary. Hence, even
though there's no danger of the race in these functions, we want to
adhere to the broader rule.
Change-Id: Ie22319c30eea37d703eb52f5c7ca5da872030b88
Reviewed-on: https://go-review.googlesource.com/8130
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Minux Ma <minux@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-26 15:50:22 -04:00
|
|
|
// Always runs without a P, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func handoffp(_p_ *p) {
|
|
|
|
|
// if it has local work, start it straight away
|
2015-04-22 12:18:01 -04:00
|
|
|
if !runqempty(_p_) || sched.runqsize != 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
startm(_p_, false)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
// no local work, check that there are no spinning/idle M's,
|
|
|
|
|
// otherwise our help is not required
|
|
|
|
|
if atomicload(&sched.nmspinning)+atomicload(&sched.npidle) == 0 && cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
|
|
|
|
|
startm(_p_, true)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
if sched.gcwaiting != 0 {
|
|
|
|
|
_p_.status = _Pgcstop
|
|
|
|
|
sched.stopwait--
|
|
|
|
|
if sched.stopwait == 0 {
|
|
|
|
|
notewakeup(&sched.stopnote)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-03-27 16:49:12 -04:00
|
|
|
if _p_.runSafePointFn != 0 && cas(&_p_.runSafePointFn, 1, 0) {
|
|
|
|
|
sched.safePointFn(_p_)
|
runtime: use separate count and note for forEachP
Currently, forEachP reuses the stopwait and stopnote fields from
stopTheWorld to track how many Ps have not responded to the safe-point
request and to sleep until all Ps have responded.
It was assumed this was safe because both stopTheWorld and forEachP
must occur under the worlsema and hence stopwait and stopnote cannot
be used for both purposes simultaneously and callers could always
determine the appropriate use based on sched.gcwaiting (which is only
set by stopTheWorld). However, this is not the case, since it's
possible for there to be a window between when an M observes that
gcwaiting is set and when it checks stopwait during which stopwait
could have changed meanings. When this happens, the M decrements
stopwait and may wakeup stopnote, but does not otherwise participate
in the forEachP protocol. As a result, stopwait is decremented too
many times, so it may reach zero before all Ps have run the safe-point
function, causing forEachP to wake up early. It will then either
observe that some P has not run the safe-point function and panic with
"P did not run fn", or the remaining P (or Ps) will run the safe-point
function before it wakes up and it will observe that stopwait is
negative and panic with "not stopped".
Fix this problem by giving forEachP its own safePointWait and
safePointNote fields.
One known sequence of events that can cause this race is as
follows. It involves three actors:
G1 is running on M1 on P1. P1 has an empty run queue.
G2/M2 is in a blocked syscall and has lost its P. (The details of this
don't matter, it just needs to be in a position where it needs to grab
an idle P.)
GC just started on G3/M3/P3. (These aren't very involved, they just
have to be separate from the other G's, M's, and P's.)
1. GC calls stopTheWorld(), which sets sched.gcwaiting to 1.
Now G1/M1 begins to enter a syscall:
2. G1/M1 invokes reentersyscall, which sets the P1's status to
_Psyscall.
3. G1/M1's reentersyscall observes gcwaiting != 0 and calls
entersyscall_gcwait.
4. G1/M1's entersyscall_gcwait blocks acquiring sched.lock.
Back on GC:
5. stopTheWorld cas's P1's status to _Pgcstop, does other stuff, and
returns.
6. GC does stuff and then calls startTheWorld().
7. startTheWorld() calls procresize(), which sets P1's status to
_Pidle and puts P1 on the idle list.
Now G2/M2 returns from its syscall and takes over P1:
8. G2/M2 returns from its blocked syscall and gets P1 from the idle
list.
9. G2/M2 acquires P1, which sets P1's status to _Prunning.
10. G2/M2 starts a new syscall and invokes reentersyscall, which sets
P1's status to _Psyscall.
Back on G1/M1:
11. G1/M1 finally acquires sched.lock in entersyscall_gcwait.
At this point, G1/M1 still thinks it's running on P1. P1's status is
_Psyscall, which is consistent with what G1/M1 is doing, but it's
_Psyscall because *G2/M2* put it in to _Psyscall, not G1/M1. This is
basically an ABA race on P1's status.
Because forEachP currently shares stopwait with stopTheWorld. G1/M1's
entersyscall_gcwait observes the non-zero stopwait set by forEachP,
but mistakes it for a stopTheWorld. It cas's P1's status from
_Psyscall (set by G2/M2) to _Pgcstop and proceeds to decrement
stopwait one more time than forEachP was expecting.
Fixes #10618. (See the issue for details on why the above race is safe
when forEachP is not involved.)
Prior to this commit, the command
stress ./runtime.test -test.run TestFutexsleep\|TestGoroutineProfile
would reliably fail after a few hundred runs. With this commit, it
ran for over 2 million runs and never crashed.
Change-Id: I9a91ea20035b34b6e5f07ef135b144115f281f30
Reviewed-on: https://go-review.googlesource.com/10157
Reviewed-by: Russ Cox <rsc@golang.org>
2015-05-15 16:31:17 -04:00
|
|
|
sched.safePointWait--
|
|
|
|
|
if sched.safePointWait == 0 {
|
|
|
|
|
notewakeup(&sched.safePointNote)
|
2015-03-27 16:49:12 -04:00
|
|
|
}
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
if sched.runqsize != 0 {
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
startm(_p_, false)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
// If this is the last running P and nobody is polling network,
|
|
|
|
|
// need to wakeup another M to poll network.
|
|
|
|
|
if sched.npidle == uint32(gomaxprocs-1) && atomicload64(&sched.lastpoll) != 0 {
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
startm(_p_, false)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
pidleput(_p_)
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Tries to add one more P to execute G's.
|
|
|
|
|
// Called when a G is made runnable (newproc, ready).
|
|
|
|
|
func wakep() {
|
|
|
|
|
// be conservative about spinning threads
|
|
|
|
|
if !cas(&sched.nmspinning, 0, 1) {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
startm(nil, true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Stops execution of the current m that is locked to a g until the g is runnable again.
|
|
|
|
|
// Returns with acquired P.
|
|
|
|
|
func stoplockedm() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("stoplockedm: inconsistent locking")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p != 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
// Schedule another M to run this p.
|
|
|
|
|
_p_ := releasep()
|
|
|
|
|
handoffp(_p_)
|
|
|
|
|
}
|
|
|
|
|
incidlelocked(1)
|
|
|
|
|
// Wait until another thread schedules lockedg again.
|
|
|
|
|
notesleep(&_g_.m.park)
|
|
|
|
|
noteclear(&_g_.m.park)
|
|
|
|
|
status := readgstatus(_g_.m.lockedg)
|
|
|
|
|
if status&^_Gscan != _Grunnable {
|
|
|
|
|
print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
|
|
|
|
|
dumpgstatus(_g_)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("stoplockedm: not runnable")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
acquirep(_g_.m.nextp.ptr())
|
|
|
|
|
_g_.m.nextp = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Schedules the locked m to run the locked gp.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func startlockedm(gp *g) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
mp := gp.lockedm
|
|
|
|
|
if mp == _g_.m {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("startlockedm: locked to me")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
if mp.nextp != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("startlockedm: m has p")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
// directly handoff current P to the locked m
|
|
|
|
|
incidlelocked(-1)
|
|
|
|
|
_p_ := releasep()
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.nextp.set(_p_)
|
2014-11-11 17:08:33 -05:00
|
|
|
notewakeup(&mp.park)
|
|
|
|
|
stopm()
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-15 16:00:50 -04:00
|
|
|
// Stops the current m for stopTheWorld.
|
2014-11-11 17:08:33 -05:00
|
|
|
// Returns when the world is restarted.
|
|
|
|
|
func gcstopm() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if sched.gcwaiting == 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("gcstopm: not waiting for gc")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if _g_.m.spinning {
|
|
|
|
|
_g_.m.spinning = false
|
|
|
|
|
xadd(&sched.nmspinning, -1)
|
|
|
|
|
}
|
|
|
|
|
_p_ := releasep()
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
_p_.status = _Pgcstop
|
|
|
|
|
sched.stopwait--
|
|
|
|
|
if sched.stopwait == 0 {
|
|
|
|
|
notewakeup(&sched.stopnote)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
stopm()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Schedules gp to run on the current M.
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
// If inheritTime is true, gp inherits the remaining time in the
|
|
|
|
|
// current time slice. Otherwise, it starts a new time slice.
|
2014-11-11 17:08:33 -05:00
|
|
|
// Never returns.
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
func execute(gp *g, inheritTime bool) {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
casgstatus(gp, _Grunnable, _Grunning)
|
|
|
|
|
gp.waitsince = 0
|
|
|
|
|
gp.preempt = false
|
2015-01-05 16:29:21 +00:00
|
|
|
gp.stackguard0 = gp.stack.lo + _StackGuard
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
if !inheritTime {
|
|
|
|
|
_g_.m.p.ptr().schedtick++
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.m.curg = gp
|
|
|
|
|
gp.m = _g_.m
|
|
|
|
|
|
|
|
|
|
// Check whether the profiler needs to be turned on or off.
|
|
|
|
|
hz := sched.profilehz
|
|
|
|
|
if _g_.m.profilehz != hz {
|
|
|
|
|
resetcpuprofiler(hz)
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-04-20 15:57:52 +03:00
|
|
|
// GoSysExit has to happen when we have a P, but before GoStart.
|
|
|
|
|
// So we emit it here.
|
|
|
|
|
if gp.syscallsp != 0 && gp.sysblocktraced {
|
2015-07-23 14:01:03 -04:00
|
|
|
traceGoSysExit(gp.sysexitseq, gp.sysexitticks)
|
2015-04-20 15:57:52 +03:00
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
traceGoStart()
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
gogo(&gp.sched)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Finds a runnable goroutine to execute.
|
|
|
|
|
// Tries to steal from other P's, get g from global queue, poll network.
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
func findrunnable() (gp *g, inheritTime bool) {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
top:
|
|
|
|
|
if sched.gcwaiting != 0 {
|
|
|
|
|
gcstopm()
|
|
|
|
|
goto top
|
|
|
|
|
}
|
2015-03-27 16:49:12 -04:00
|
|
|
if _g_.m.p.ptr().runSafePointFn != 0 {
|
|
|
|
|
runSafePointFn()
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
if fingwait && fingwake {
|
|
|
|
|
if gp := wakefing(); gp != nil {
|
2015-02-21 21:01:40 +03:00
|
|
|
ready(gp, 0)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// local runq
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
if gp, inheritTime := runqget(_g_.m.p.ptr()); gp != nil {
|
|
|
|
|
return gp, inheritTime
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// global runq
|
|
|
|
|
if sched.runqsize != 0 {
|
|
|
|
|
lock(&sched.lock)
|
2015-04-17 00:21:30 -04:00
|
|
|
gp := globrunqget(_g_.m.p.ptr(), 0)
|
2014-11-11 17:08:33 -05:00
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if gp != nil {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return gp, false
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-11 19:11:32 +03:00
|
|
|
// Poll network.
|
|
|
|
|
// This netpoll is only an optimization before we resort to stealing.
|
|
|
|
|
// We can safely skip it if there a thread blocked in netpoll already.
|
|
|
|
|
// If there is any kind of logical race with that blocked thread
|
|
|
|
|
// (e.g. it has already returned from netpoll, but does not set lastpoll yet),
|
|
|
|
|
// this thread will do blocking netpoll below anyway.
|
|
|
|
|
if netpollinited() && sched.lastpoll != 0 {
|
|
|
|
|
if gp := netpoll(false); gp != nil { // non-blocking
|
|
|
|
|
// netpoll returns list of goroutines linked by schedlink.
|
2015-04-17 00:21:30 -04:00
|
|
|
injectglist(gp.schedlink.ptr())
|
2015-02-11 19:11:32 +03:00
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
|
|
|
|
if trace.enabled {
|
2015-02-21 21:01:40 +03:00
|
|
|
traceGoUnpark(gp, 0)
|
2015-02-11 19:11:32 +03:00
|
|
|
}
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return gp, false
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If number of spinning M's >= number of busy P's, block.
|
|
|
|
|
// This is necessary to prevent excessive CPU consumption
|
|
|
|
|
// when GOMAXPROCS>>1 but the program parallelism is low.
|
|
|
|
|
if !_g_.m.spinning && 2*atomicload(&sched.nmspinning) >= uint32(gomaxprocs)-atomicload(&sched.npidle) { // TODO: fast atomic
|
|
|
|
|
goto stop
|
|
|
|
|
}
|
|
|
|
|
if !_g_.m.spinning {
|
|
|
|
|
_g_.m.spinning = true
|
|
|
|
|
xadd(&sched.nmspinning, 1)
|
|
|
|
|
}
|
|
|
|
|
// random steal from other P's
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
for i := 0; i < int(4*gomaxprocs); i++ {
|
2014-11-11 17:08:33 -05:00
|
|
|
if sched.gcwaiting != 0 {
|
|
|
|
|
goto top
|
|
|
|
|
}
|
|
|
|
|
_p_ := allp[fastrand1()%uint32(gomaxprocs)]
|
|
|
|
|
var gp *g
|
2015-04-17 00:21:30 -04:00
|
|
|
if _p_ == _g_.m.p.ptr() {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
gp, _ = runqget(_p_)
|
2014-11-11 17:08:33 -05:00
|
|
|
} else {
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
stealRunNextG := i > 2*int(gomaxprocs) // first look for ready queues with more than 1 g
|
|
|
|
|
gp = runqsteal(_g_.m.p.ptr(), _p_, stealRunNextG)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if gp != nil {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return gp, false
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
stop:
|
|
|
|
|
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
// We have nothing to do. If we're in the GC mark phase and can
|
2015-03-27 17:01:53 -04:00
|
|
|
// safely scan and blacken objects, run idle-time marking
|
|
|
|
|
// rather than give up the P.
|
2015-05-18 16:02:37 -04:00
|
|
|
if _p_ := _g_.m.p.ptr(); gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != nil && gcMarkWorkAvailable(_p_) {
|
2015-04-15 17:01:30 -04:00
|
|
|
_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 21:07:33 -04:00
|
|
|
gp := _p_.gcBgMarkWorker
|
|
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
|
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoUnpark(gp, 0)
|
|
|
|
|
}
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return gp, false
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 21:07:33 -04:00
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// return P and block
|
|
|
|
|
lock(&sched.lock)
|
2015-03-27 16:49:12 -04:00
|
|
|
if sched.gcwaiting != 0 || _g_.m.p.ptr().runSafePointFn != 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
unlock(&sched.lock)
|
|
|
|
|
goto top
|
|
|
|
|
}
|
|
|
|
|
if sched.runqsize != 0 {
|
2015-04-17 00:21:30 -04:00
|
|
|
gp := globrunqget(_g_.m.p.ptr(), 0)
|
2014-11-11 17:08:33 -05:00
|
|
|
unlock(&sched.lock)
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return gp, false
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
_p_ := releasep()
|
|
|
|
|
pidleput(_p_)
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if _g_.m.spinning {
|
|
|
|
|
_g_.m.spinning = false
|
|
|
|
|
xadd(&sched.nmspinning, -1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// check all runqueues once again
|
|
|
|
|
for i := 0; i < int(gomaxprocs); i++ {
|
|
|
|
|
_p_ := allp[i]
|
2015-04-22 12:18:01 -04:00
|
|
|
if _p_ != nil && !runqempty(_p_) {
|
2014-11-11 17:08:33 -05:00
|
|
|
lock(&sched.lock)
|
|
|
|
|
_p_ = pidleget()
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if _p_ != nil {
|
|
|
|
|
acquirep(_p_)
|
|
|
|
|
goto top
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// poll network
|
2015-01-13 20:12:50 +03:00
|
|
|
if netpollinited() && xchg64(&sched.lastpoll, 0) != 0 {
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("findrunnable: netpoll with p")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if _g_.m.spinning {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("findrunnable: netpoll with spinning")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
gp := netpoll(true) // block until new work is available
|
|
|
|
|
atomicstore64(&sched.lastpoll, uint64(nanotime()))
|
|
|
|
|
if gp != nil {
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
_p_ = pidleget()
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if _p_ != nil {
|
|
|
|
|
acquirep(_p_)
|
2015-04-17 00:21:30 -04:00
|
|
|
injectglist(gp.schedlink.ptr())
|
2014-11-11 17:08:33 -05:00
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-02-21 21:01:40 +03:00
|
|
|
traceGoUnpark(gp, 0)
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return gp, false
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
injectglist(gp)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
stopm()
|
|
|
|
|
goto top
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func resetspinning() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
var nmspinning uint32
|
|
|
|
|
if _g_.m.spinning {
|
|
|
|
|
_g_.m.spinning = false
|
|
|
|
|
nmspinning = xadd(&sched.nmspinning, -1)
|
|
|
|
|
if nmspinning < 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("findrunnable: negative nmspinning")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
nmspinning = atomicload(&sched.nmspinning)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
|
|
|
|
|
// so see if we need to wakeup another P here.
|
|
|
|
|
if nmspinning == 0 && atomicload(&sched.npidle) > 0 {
|
|
|
|
|
wakep()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Injects the list of runnable G's into the scheduler.
|
|
|
|
|
// Can run concurrently with GC.
|
|
|
|
|
func injectglist(glist *g) {
|
|
|
|
|
if glist == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-04-17 00:21:30 -04:00
|
|
|
for gp := glist; gp != nil; gp = gp.schedlink.ptr() {
|
2015-02-21 21:01:40 +03:00
|
|
|
traceGoUnpark(gp, 0)
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
lock(&sched.lock)
|
|
|
|
|
var n int
|
|
|
|
|
for n = 0; glist != nil; n++ {
|
|
|
|
|
gp := glist
|
2015-04-17 00:21:30 -04:00
|
|
|
glist = gp.schedlink.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
|
|
|
|
globrunqput(gp)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
for ; n != 0 && sched.npidle != 0; n-- {
|
|
|
|
|
startm(nil, false)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// One round of scheduler: find a runnable goroutine and execute it.
|
|
|
|
|
// Never returns.
|
|
|
|
|
func schedule() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if _g_.m.locks != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("schedule: holding locks")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if _g_.m.lockedg != nil {
|
|
|
|
|
stoplockedm()
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
execute(_g_.m.lockedg, false) // Never returns.
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
top:
|
|
|
|
|
if sched.gcwaiting != 0 {
|
|
|
|
|
gcstopm()
|
|
|
|
|
goto top
|
|
|
|
|
}
|
2015-03-27 16:49:12 -04:00
|
|
|
if _g_.m.p.ptr().runSafePointFn != 0 {
|
|
|
|
|
runSafePointFn()
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
var gp *g
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
var inheritTime bool
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled || trace.shutdown {
|
|
|
|
|
gp = traceReader()
|
2014-11-11 17:08:33 -05:00
|
|
|
if gp != nil {
|
2014-12-12 18:41:57 +01:00
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
2015-02-21 21:01:40 +03:00
|
|
|
traceGoUnpark(gp, 0)
|
2014-11-11 17:08:33 -05:00
|
|
|
resetspinning()
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-03-27 17:01:53 -04:00
|
|
|
if gp == nil && gcBlackenEnabled != 0 {
|
2015-04-24 14:17:42 -04:00
|
|
|
gp = gcController.findRunnableGCWorker(_g_.m.p.ptr())
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 21:07:33 -04:00
|
|
|
if gp != nil {
|
|
|
|
|
resetspinning()
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
if gp == nil {
|
|
|
|
|
// Check the global runnable queue once in a while to ensure fairness.
|
|
|
|
|
// Otherwise two goroutines can completely occupy the local runqueue
|
|
|
|
|
// by constantly respawning each other.
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p.ptr().schedtick%61 == 0 && sched.runqsize > 0 {
|
2014-12-12 18:41:57 +01:00
|
|
|
lock(&sched.lock)
|
2015-04-17 00:21:30 -04:00
|
|
|
gp = globrunqget(_g_.m.p.ptr(), 1)
|
2014-12-12 18:41:57 +01:00
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if gp != nil {
|
|
|
|
|
resetspinning()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
if gp == nil {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
gp, inheritTime = runqget(_g_.m.p.ptr())
|
2014-11-11 17:08:33 -05:00
|
|
|
if gp != nil && _g_.m.spinning {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("schedule: spinning with local work")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if gp == nil {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
gp, inheritTime = findrunnable() // blocks until work is available
|
2014-11-11 17:08:33 -05:00
|
|
|
resetspinning()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if gp.lockedm != nil {
|
|
|
|
|
// Hands off own p to the locked m,
|
|
|
|
|
// then blocks waiting for a new p.
|
|
|
|
|
startlockedm(gp)
|
|
|
|
|
goto top
|
|
|
|
|
}
|
|
|
|
|
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
execute(gp, inheritTime)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dropg removes the association between m and the current goroutine m->curg (gp for short).
|
|
|
|
|
// Typically a caller sets gp's status away from Grunning and then
|
|
|
|
|
// immediately calls dropg to finish the job. The caller is also responsible
|
|
|
|
|
// for arranging that gp will be restarted using ready at an
|
|
|
|
|
// appropriate time. After calling dropg and arranging for gp to be
|
|
|
|
|
// readied later, the caller can do other work but eventually should
|
|
|
|
|
// call schedule to restart the scheduling of goroutines on this m.
|
|
|
|
|
func dropg() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if _g_.m.lockedg == nil {
|
|
|
|
|
_g_.m.curg.m = nil
|
|
|
|
|
_g_.m.curg = nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
|
|
|
|
|
unlock((*mutex)(lock))
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// park continuation on g0.
|
|
|
|
|
func park_m(gp *g) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-02-21 21:01:40 +03:00
|
|
|
traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip, gp)
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
casgstatus(gp, _Grunning, _Gwaiting)
|
|
|
|
|
dropg()
|
|
|
|
|
|
|
|
|
|
if _g_.m.waitunlockf != nil {
|
|
|
|
|
fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
|
|
|
|
|
ok := fn(gp, _g_.m.waitlock)
|
|
|
|
|
_g_.m.waitunlockf = nil
|
|
|
|
|
_g_.m.waitlock = nil
|
|
|
|
|
if !ok {
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-02-21 21:01:40 +03:00
|
|
|
traceGoUnpark(gp, 2)
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
execute(gp, true) // Schedule it back, never returns.
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
schedule()
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-12 18:41:57 +01:00
|
|
|
func goschedImpl(gp *g) {
|
2014-11-11 17:08:33 -05:00
|
|
|
status := readgstatus(gp)
|
|
|
|
|
if status&^_Gscan != _Grunning {
|
|
|
|
|
dumpgstatus(gp)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad g status")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
casgstatus(gp, _Grunning, _Grunnable)
|
|
|
|
|
dropg()
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
globrunqput(gp)
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
|
|
|
|
|
schedule()
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-12 18:41:57 +01:00
|
|
|
// Gosched continuation on g0.
|
|
|
|
|
func gosched_m(gp *g) {
|
|
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoSched()
|
|
|
|
|
}
|
|
|
|
|
goschedImpl(gp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func gopreempt_m(gp *g) {
|
|
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoPreempt()
|
|
|
|
|
}
|
|
|
|
|
goschedImpl(gp)
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// Finishes execution of the current goroutine.
|
|
|
|
|
func goexit1() {
|
|
|
|
|
if raceenabled {
|
|
|
|
|
racegoend()
|
|
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoEnd()
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
mcall(goexit0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// goexit continuation on g0.
|
|
|
|
|
func goexit0(gp *g) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
casgstatus(gp, _Grunning, _Gdead)
|
|
|
|
|
gp.m = nil
|
|
|
|
|
gp.lockedm = nil
|
|
|
|
|
_g_.m.lockedg = nil
|
|
|
|
|
gp.paniconfault = false
|
|
|
|
|
gp._defer = nil // should be true already but just in case.
|
|
|
|
|
gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
|
|
|
|
|
gp.writebuf = nil
|
|
|
|
|
gp.waitreason = ""
|
|
|
|
|
gp.param = nil
|
|
|
|
|
|
|
|
|
|
dropg()
|
|
|
|
|
|
|
|
|
|
if _g_.m.locked&^_LockExternal != 0 {
|
|
|
|
|
print("invalid m->locked = ", _g_.m.locked, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("internal lockOSThread error")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
_g_.m.locked = 0
|
2015-04-17 00:21:30 -04:00
|
|
|
gfput(_g_.m.p.ptr(), gp)
|
2014-11-11 17:08:33 -05:00
|
|
|
schedule()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
2014-12-09 10:15:18 -05:00
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func save(pc, sp uintptr) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
_g_.sched.pc = pc
|
|
|
|
|
_g_.sched.sp = sp
|
|
|
|
|
_g_.sched.lr = 0
|
|
|
|
|
_g_.sched.ret = 0
|
|
|
|
|
_g_.sched.ctxt = nil
|
2014-12-22 22:43:49 -05:00
|
|
|
_g_.sched.g = guintptr(unsafe.Pointer(_g_))
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The goroutine g is about to enter a system call.
|
|
|
|
|
// Record that it's not using the cpu anymore.
|
|
|
|
|
// This is called only from the go syscall library and cgocall,
|
|
|
|
|
// not from the low-level system calls used by the
|
|
|
|
|
//
|
|
|
|
|
// Entersyscall cannot split the stack: the gosave must
|
|
|
|
|
// make g->sched refer to the caller's stack segment, because
|
|
|
|
|
// entersyscall is going to return immediately after.
|
|
|
|
|
//
|
|
|
|
|
// Nothing entersyscall calls can split the stack either.
|
|
|
|
|
// We cannot safely move the stack during an active call to syscall,
|
|
|
|
|
// because we do not know which of the uintptr arguments are
|
|
|
|
|
// really pointers (back into the stack).
|
|
|
|
|
// In practice, this means that we make the fast path run through
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
// entersyscall doing no-split things, and the slow path has to use systemstack
|
|
|
|
|
// to run bigger things on the system stack.
|
2014-11-11 17:08:33 -05:00
|
|
|
//
|
|
|
|
|
// reentersyscall is the entry point used by cgo callbacks, where explicitly
|
|
|
|
|
// saved SP and PC are restored. This is needed when exitsyscall will be called
|
|
|
|
|
// from a function further up in the call stack than the parent, as g->syscallsp
|
|
|
|
|
// must always point to a valid stack frame. entersyscall below is the normal
|
|
|
|
|
// entry point for syscalls, which obtains the SP and PC from the caller.
|
2014-12-12 18:41:57 +01:00
|
|
|
//
|
|
|
|
|
// Syscall tracing:
|
|
|
|
|
// At the start of a syscall we emit traceGoSysCall to capture the stack trace.
|
|
|
|
|
// If the syscall does not block, that is it, we do not emit any other events.
|
|
|
|
|
// If the syscall blocks (that is, P is retaken), retaker emits traceGoSysBlock;
|
|
|
|
|
// when syscall returns we emit traceGoSysExit and when the goroutine starts running
|
|
|
|
|
// (potentially instantly, if exitsyscallfast returns true) we emit traceGoStart.
|
|
|
|
|
// To ensure that traceGoSysExit is emitted strictly after traceGoSysBlock,
|
2015-04-17 00:21:30 -04:00
|
|
|
// we remember current value of syscalltick in m (_g_.m.syscalltick = _g_.m.p.ptr().syscalltick),
|
2014-12-12 18:41:57 +01:00
|
|
|
// whoever emits traceGoSysBlock increments p.syscalltick afterwards;
|
|
|
|
|
// and we wait for the increment before emitting traceGoSysExit.
|
|
|
|
|
// Note that the increment is done even if tracing is not enabled,
|
|
|
|
|
// because tracing can be enabled in the middle of syscall. We don't want the wait to hang.
|
|
|
|
|
//
|
2014-11-11 17:08:33 -05:00
|
|
|
//go:nosplit
|
|
|
|
|
func reentersyscall(pc, sp uintptr) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
// Disable preemption because during this function g is in Gsyscall status,
|
|
|
|
|
// but can have inconsistent g->sched, do not let GC observe it.
|
|
|
|
|
_g_.m.locks++
|
|
|
|
|
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
systemstack(traceGoSysCall)
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// Entersyscall must not call any function that might split/grow the stack.
|
|
|
|
|
// (See details in comment above.)
|
|
|
|
|
// Catch calls that might, by replacing the stack guard with something that
|
|
|
|
|
// will trip any stack check and leaving a flag to tell newstack to die.
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.throwsplit = true
|
|
|
|
|
|
|
|
|
|
// Leave SP around for GC and traceback.
|
|
|
|
|
save(pc, sp)
|
|
|
|
|
_g_.syscallsp = sp
|
|
|
|
|
_g_.syscallpc = pc
|
|
|
|
|
casgstatus(_g_, _Grunning, _Gsyscall)
|
|
|
|
|
if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
|
2014-11-15 08:00:38 -05:00
|
|
|
systemstack(func() {
|
|
|
|
|
print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("entersyscall")
|
2014-11-15 08:00:38 -05:00
|
|
|
})
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(entersyscall_sysmon)
|
2014-11-11 17:08:33 -05:00
|
|
|
save(pc, sp)
|
|
|
|
|
}
|
|
|
|
|
|
runtime: run safe-point function before entering _Psyscall
Currently, we run a P's safe-point function immediately after entering
_Psyscall state. This is unsafe, since as soon as we put the P in
_Psyscall, we no longer control the P and another M may claim it.
We'll still run the safe-point function only once (because doing so
races on an atomic), but the P may no longer be at a safe-point when
we do so.
In particular, this means that the use of forEachP to dispose all P's
gcw caches is unsafe. A P may enter a syscall, run the safe-point
function, and dispose the P's gcw cache concurrently with another M
claiming the P and attempting to use its gcw cache. If this happens,
we may empty the gcw's workbuf after putting it on
work.{full,partial}, or add pointers to it after putting it in
work.empty. This will cause an assertion failure when we later pop the
workbuf from the list and its object count is inconsistent with the
list we got it from.
Fix this by running the safe-point function just before putting the P
in _Psyscall.
Related to #11640. This probably fixes this issue, but while I'm able
to show that we can enter a bad safe-point state as a result of this,
I can't reproduce that specific failure.
Change-Id: I6989c8ca7ef2a4a941ae1931e9a0748cbbb59434
Reviewed-on: https://go-review.googlesource.com/12124
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-07-13 18:27:24 -04:00
|
|
|
if _g_.m.p.ptr().runSafePointFn != 0 {
|
|
|
|
|
// runSafePointFn may stack split if run on this stack
|
|
|
|
|
systemstack(runSafePointFn)
|
2015-07-13 18:30:10 -04:00
|
|
|
save(pc, sp)
|
runtime: run safe-point function before entering _Psyscall
Currently, we run a P's safe-point function immediately after entering
_Psyscall state. This is unsafe, since as soon as we put the P in
_Psyscall, we no longer control the P and another M may claim it.
We'll still run the safe-point function only once (because doing so
races on an atomic), but the P may no longer be at a safe-point when
we do so.
In particular, this means that the use of forEachP to dispose all P's
gcw caches is unsafe. A P may enter a syscall, run the safe-point
function, and dispose the P's gcw cache concurrently with another M
claiming the P and attempting to use its gcw cache. If this happens,
we may empty the gcw's workbuf after putting it on
work.{full,partial}, or add pointers to it after putting it in
work.empty. This will cause an assertion failure when we later pop the
workbuf from the list and its object count is inconsistent with the
list we got it from.
Fix this by running the safe-point function just before putting the P
in _Psyscall.
Related to #11640. This probably fixes this issue, but while I'm able
to show that we can enter a bad safe-point state as a result of this,
I can't reproduce that specific failure.
Change-Id: I6989c8ca7ef2a4a941ae1931e9a0748cbbb59434
Reviewed-on: https://go-review.googlesource.com/12124
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-07-13 18:27:24 -04:00
|
|
|
}
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
|
2015-04-20 15:57:52 +03:00
|
|
|
_g_.sysblocktraced = true
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.m.mcache = nil
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.ptr().m = 0
|
|
|
|
|
atomicstore(&_g_.m.p.ptr().status, _Psyscall)
|
2014-11-11 17:08:33 -05:00
|
|
|
if sched.gcwaiting != 0 {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(entersyscall_gcwait)
|
2014-11-11 17:08:33 -05:00
|
|
|
save(pc, sp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
|
|
|
|
|
// We set _StackGuard to StackPreempt so that first split stack check calls morestack.
|
|
|
|
|
// Morestack detects this case and throws.
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.m.locks--
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Standard syscall entry used by the go syscall library and normal cgo calls.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func entersyscall(dummy int32) {
|
|
|
|
|
reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func entersyscall_sysmon() {
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
if atomicload(&sched.sysmonwait) != 0 {
|
|
|
|
|
atomicstore(&sched.sysmonwait, 0)
|
|
|
|
|
notewakeup(&sched.sysmonnote)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func entersyscall_gcwait() {
|
|
|
|
|
_g_ := getg()
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_ := _g_.m.p.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
lock(&sched.lock)
|
2014-12-12 18:41:57 +01:00
|
|
|
if sched.stopwait > 0 && cas(&_p_.status, _Psyscall, _Pgcstop) {
|
|
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoSysBlock(_p_)
|
|
|
|
|
traceProcStop(_p_)
|
|
|
|
|
}
|
|
|
|
|
_p_.syscalltick++
|
2014-11-11 17:08:33 -05:00
|
|
|
if sched.stopwait--; sched.stopwait == 0 {
|
|
|
|
|
notewakeup(&sched.stopnote)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The same as entersyscall(), but with a hint that the syscall is blocking.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func entersyscallblock(dummy int32) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
_g_.m.locks++ // see comment in entersyscall
|
|
|
|
|
_g_.throwsplit = true
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt // see comment in entersyscall
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.syscalltick = _g_.m.p.ptr().syscalltick
|
2015-04-20 15:57:52 +03:00
|
|
|
_g_.sysblocktraced = true
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.ptr().syscalltick++
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
// Leave SP around for GC and traceback.
|
2014-11-15 08:00:38 -05:00
|
|
|
pc := getcallerpc(unsafe.Pointer(&dummy))
|
|
|
|
|
sp := getcallersp(unsafe.Pointer(&dummy))
|
|
|
|
|
save(pc, sp)
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.syscallsp = _g_.sched.sp
|
|
|
|
|
_g_.syscallpc = _g_.sched.pc
|
2014-11-15 08:00:38 -05:00
|
|
|
if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
|
|
|
|
|
sp1 := sp
|
|
|
|
|
sp2 := _g_.sched.sp
|
|
|
|
|
sp3 := _g_.syscallsp
|
|
|
|
|
systemstack(func() {
|
|
|
|
|
print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("entersyscallblock")
|
2014-11-15 08:00:38 -05:00
|
|
|
})
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
casgstatus(_g_, _Grunning, _Gsyscall)
|
|
|
|
|
if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
|
2014-11-15 08:00:38 -05:00
|
|
|
systemstack(func() {
|
|
|
|
|
print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("entersyscallblock")
|
2014-11-15 08:00:38 -05:00
|
|
|
})
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(entersyscallblock_handoff)
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
// Resave for traceback during blocked call.
|
|
|
|
|
save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
|
|
|
|
|
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func entersyscallblock_handoff() {
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoSysCall()
|
2015-04-17 00:21:30 -04:00
|
|
|
traceGoSysBlock(getg().m.p.ptr())
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
handoffp(releasep())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The goroutine g exited its system call.
|
|
|
|
|
// Arrange for it to run on a cpu again.
|
|
|
|
|
// This is called only from the go syscall library, not
|
|
|
|
|
// from the low-level system calls used by the
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func exitsyscall(dummy int32) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
_g_.m.locks++ // see comment in entersyscall
|
|
|
|
|
if getcallersp(unsafe.Pointer(&dummy)) > _g_.syscallsp {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("exitsyscall: syscall frame is no longer valid")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_g_.waitsince = 0
|
2015-04-17 00:21:30 -04:00
|
|
|
oldp := _g_.m.p.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
if exitsyscallfast() {
|
|
|
|
|
if _g_.m.mcache == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("lost mcache")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-04-17 00:21:30 -04:00
|
|
|
if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
|
2014-12-12 18:41:57 +01:00
|
|
|
systemstack(traceGoStart)
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
// There's a cpu for us, so we can run.
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.ptr().syscalltick++
|
2014-11-11 17:08:33 -05:00
|
|
|
// We need to cas the status and scan before resuming...
|
|
|
|
|
casgstatus(_g_, _Gsyscall, _Grunning)
|
|
|
|
|
|
|
|
|
|
// Garbage collector isn't running (since we are),
|
|
|
|
|
// so okay to clear syscallsp.
|
|
|
|
|
_g_.syscallsp = 0
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
if _g_.preempt {
|
|
|
|
|
// restore the preemption request in case we've cleared it in newstack
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
} else {
|
|
|
|
|
// otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = _g_.stack.lo + _StackGuard
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
_g_.throwsplit = false
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-20 15:57:52 +03:00
|
|
|
_g_.sysexitticks = 0
|
2015-07-23 14:01:03 -04:00
|
|
|
_g_.sysexitseq = 0
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-06-11 16:49:38 +03:00
|
|
|
// Wait till traceGoSysBlock event is emitted.
|
2014-12-12 18:41:57 +01:00
|
|
|
// This ensures consistency of the trace (the goroutine is started after it is blocked).
|
|
|
|
|
for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
|
|
|
|
|
osyield()
|
|
|
|
|
}
|
2015-04-10 18:32:40 +03:00
|
|
|
// We can't trace syscall exit right now because we don't have a P.
|
|
|
|
|
// Tracing code can invoke write barriers that cannot run without a P.
|
|
|
|
|
// So instead we remember the syscall exit time and emit the event
|
2015-04-20 15:57:52 +03:00
|
|
|
// in execute when we have a P.
|
2015-07-23 14:01:03 -04:00
|
|
|
_g_.sysexitseq, _g_.sysexitticks = tracestamp()
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.m.locks--
|
|
|
|
|
|
|
|
|
|
// Call the scheduler.
|
|
|
|
|
mcall(exitsyscall0)
|
|
|
|
|
|
|
|
|
|
if _g_.m.mcache == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("lost mcache")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Scheduler returned, so we're allowed to run now.
|
|
|
|
|
// Delete the syscallsp information that we left for
|
|
|
|
|
// the garbage collector during the system call.
|
|
|
|
|
// Must wait until now because until gosched returns
|
|
|
|
|
// we don't know for sure that the garbage collector
|
|
|
|
|
// is not running.
|
|
|
|
|
_g_.syscallsp = 0
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.ptr().syscalltick++
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.throwsplit = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func exitsyscallfast() bool {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
// Freezetheworld sets stopwait but does not retake P's.
|
2015-03-27 16:11:11 -04:00
|
|
|
if sched.stopwait == freezeStopWait {
|
2014-11-15 08:00:38 -05:00
|
|
|
_g_.m.mcache = nil
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to re-acquire the last P.
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p != 0 && _g_.m.p.ptr().status == _Psyscall && cas(&_g_.m.p.ptr().status, _Psyscall, _Prunning) {
|
2014-11-11 17:08:33 -05:00
|
|
|
// There's a cpu for us, so we can run.
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.mcache = _g_.m.p.ptr().mcache
|
|
|
|
|
_g_.m.p.ptr().m.set(_g_.m)
|
|
|
|
|
if _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
// The p was retaken and then enter into syscall again (since _g_.m.syscalltick has changed).
|
|
|
|
|
// traceGoSysBlock for this syscall was already emitted,
|
|
|
|
|
// but here we effectively retake the p from the new syscall running on the same p.
|
|
|
|
|
systemstack(func() {
|
|
|
|
|
// Denote blocking of the new syscall.
|
2015-04-17 00:21:30 -04:00
|
|
|
traceGoSysBlock(_g_.m.p.ptr())
|
2014-12-12 18:41:57 +01:00
|
|
|
// Denote completion of the current syscall.
|
2015-07-23 14:01:03 -04:00
|
|
|
traceGoSysExit(tracestamp())
|
2014-12-12 18:41:57 +01:00
|
|
|
})
|
|
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.ptr().syscalltick++
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to get any other idle P.
|
2015-04-17 00:21:30 -04:00
|
|
|
oldp := _g_.m.p.ptr()
|
2014-11-15 08:00:38 -05:00
|
|
|
_g_.m.mcache = nil
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p = 0
|
|
|
|
|
if sched.pidle != 0 {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
var ok bool
|
|
|
|
|
systemstack(func() {
|
|
|
|
|
ok = exitsyscallfast_pidle()
|
2014-12-12 18:41:57 +01:00
|
|
|
if ok && trace.enabled {
|
|
|
|
|
if oldp != nil {
|
2015-06-11 16:49:38 +03:00
|
|
|
// Wait till traceGoSysBlock event is emitted.
|
2014-12-12 18:41:57 +01:00
|
|
|
// This ensures consistency of the trace (the goroutine is started after it is blocked).
|
|
|
|
|
for oldp.syscalltick == _g_.m.syscalltick {
|
|
|
|
|
osyield()
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-07-23 14:01:03 -04:00
|
|
|
traceGoSysExit(tracestamp())
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
})
|
|
|
|
|
if ok {
|
2014-11-11 17:08:33 -05:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
func exitsyscallfast_pidle() bool {
|
2014-11-11 17:08:33 -05:00
|
|
|
lock(&sched.lock)
|
|
|
|
|
_p_ := pidleget()
|
|
|
|
|
if _p_ != nil && atomicload(&sched.sysmonwait) != 0 {
|
|
|
|
|
atomicstore(&sched.sysmonwait, 0)
|
|
|
|
|
notewakeup(&sched.sysmonnote)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if _p_ != nil {
|
|
|
|
|
acquirep(_p_)
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
return true
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
return false
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// exitsyscall slow path on g0.
|
|
|
|
|
// Failed to acquire P, enqueue gp as runnable.
|
|
|
|
|
func exitsyscall0(gp *g) {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
casgstatus(gp, _Gsyscall, _Grunnable)
|
|
|
|
|
dropg()
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
_p_ := pidleget()
|
|
|
|
|
if _p_ == nil {
|
|
|
|
|
globrunqput(gp)
|
|
|
|
|
} else if atomicload(&sched.sysmonwait) != 0 {
|
|
|
|
|
atomicstore(&sched.sysmonwait, 0)
|
|
|
|
|
notewakeup(&sched.sysmonnote)
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
if _p_ != nil {
|
|
|
|
|
acquirep(_p_)
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
execute(gp, false) // Never returns.
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if _g_.m.lockedg != nil {
|
|
|
|
|
// Wait until another thread schedules gp and so m again.
|
|
|
|
|
stoplockedm()
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
execute(gp, false) // Never returns.
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
stopm()
|
|
|
|
|
schedule() // Never returns.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func beforefork() {
|
|
|
|
|
gp := getg().m.curg
|
|
|
|
|
|
|
|
|
|
// Fork can hang if preempted with signals frequently enough (see issue 5517).
|
|
|
|
|
// Ensure that we stay on the same M where we disable profiling.
|
|
|
|
|
gp.m.locks++
|
|
|
|
|
if gp.m.profilehz != 0 {
|
|
|
|
|
resetcpuprofiler(0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// This function is called before fork in syscall package.
|
|
|
|
|
// Code between fork and exec must not allocate memory nor even try to grow stack.
|
|
|
|
|
// Here we spoil g->_StackGuard to reliably detect any attempts to grow stack.
|
|
|
|
|
// runtime_AfterFork will undo this in parent process, but not in child.
|
2015-01-05 16:29:21 +00:00
|
|
|
gp.stackguard0 = stackFork
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Called from syscall package before fork.
|
2014-12-22 13:27:53 -05:00
|
|
|
//go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork
|
2014-11-11 17:08:33 -05:00
|
|
|
//go:nosplit
|
2014-12-22 13:27:53 -05:00
|
|
|
func syscall_runtime_BeforeFork() {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(beforefork)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func afterfork() {
|
|
|
|
|
gp := getg().m.curg
|
|
|
|
|
|
|
|
|
|
// See the comment in beforefork.
|
2015-01-05 16:29:21 +00:00
|
|
|
gp.stackguard0 = gp.stack.lo + _StackGuard
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
hz := sched.profilehz
|
|
|
|
|
if hz != 0 {
|
|
|
|
|
resetcpuprofiler(hz)
|
|
|
|
|
}
|
|
|
|
|
gp.m.locks--
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Called from syscall package after fork in parent.
|
2014-12-22 13:27:53 -05:00
|
|
|
//go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork
|
2014-11-11 17:08:33 -05:00
|
|
|
//go:nosplit
|
2014-12-22 13:27:53 -05:00
|
|
|
func syscall_runtime_AfterFork() {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(afterfork)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a new g, with a stack big enough for stacksize bytes.
|
|
|
|
|
func malg(stacksize int32) *g {
|
2015-02-03 11:20:58 +03:00
|
|
|
newg := new(g)
|
2014-11-11 17:08:33 -05:00
|
|
|
if stacksize >= 0 {
|
|
|
|
|
stacksize = round2(_StackSystem + stacksize)
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2015-05-20 16:16:04 -04:00
|
|
|
newg.stack, newg.stkbar = stackalloc(uint32(stacksize))
|
2014-11-11 17:08:33 -05:00
|
|
|
})
|
2015-01-05 16:29:21 +00:00
|
|
|
newg.stackguard0 = newg.stack.lo + _StackGuard
|
|
|
|
|
newg.stackguard1 = ^uintptr(0)
|
2015-05-20 15:29:53 -04:00
|
|
|
newg.stackAlloc = uintptr(stacksize)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
return newg
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a new g running fn with siz bytes of arguments.
|
|
|
|
|
// Put it on the queue of g's waiting to run.
|
|
|
|
|
// The compiler turns a go statement into a call to this.
|
|
|
|
|
// Cannot split the stack because it assumes that the arguments
|
|
|
|
|
// are available sequentially after &fn; they would not be
|
|
|
|
|
// copied if a stack split occurred.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func newproc(siz int32, fn *funcval) {
|
|
|
|
|
argp := add(unsafe.Pointer(&fn), ptrSize)
|
|
|
|
|
pc := getcallerpc(unsafe.Pointer(&siz))
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2014-11-11 17:08:33 -05:00
|
|
|
newproc1(fn, (*uint8)(argp), siz, 0, pc)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a new g running fn with narg bytes of arguments starting
|
|
|
|
|
// at argp and returning nret bytes of results. callerpc is the
|
|
|
|
|
// address of the go statement that created this. The new g is put
|
|
|
|
|
// on the queue of g's waiting to run.
|
|
|
|
|
func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
if fn == nil {
|
|
|
|
|
_g_.m.throwing = -1 // do not dump full stacks
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("go of nil func value")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
_g_.m.locks++ // disable preemption because it can be holding p in a local var
|
|
|
|
|
siz := narg + nret
|
|
|
|
|
siz = (siz + 7) &^ 7
|
|
|
|
|
|
|
|
|
|
// We could allocate a larger initial stack if necessary.
|
|
|
|
|
// Not worth it: this is almost always an error.
|
|
|
|
|
// 4*sizeof(uintreg): extra space added below
|
|
|
|
|
// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
|
|
|
|
|
if siz >= _StackMin-4*regSize-regSize {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("newproc: function arguments too large for new goroutine")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_ := _g_.m.p.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
newg := gfget(_p_)
|
|
|
|
|
if newg == nil {
|
|
|
|
|
newg = malg(_StackMin)
|
|
|
|
|
casgstatus(newg, _Gidle, _Gdead)
|
|
|
|
|
allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
|
|
|
|
|
}
|
|
|
|
|
if newg.stack.hi == 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("newproc1: newg missing stack")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if readgstatus(newg) != _Gdead {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("newproc1: new g is not Gdead")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
2015-03-08 14:20:20 +01:00
|
|
|
totalSize := 4*regSize + uintptr(siz) // extra space in case of reads slightly beyond frame
|
|
|
|
|
if hasLinkRegister {
|
|
|
|
|
totalSize += ptrSize
|
|
|
|
|
}
|
|
|
|
|
totalSize += -totalSize & (spAlign - 1) // align to spAlign
|
|
|
|
|
sp := newg.stack.hi - totalSize
|
|
|
|
|
spArg := sp
|
2014-11-14 12:10:52 -05:00
|
|
|
if hasLinkRegister {
|
2014-11-11 17:08:33 -05:00
|
|
|
// caller's LR
|
|
|
|
|
*(*unsafe.Pointer)(unsafe.Pointer(sp)) = nil
|
2015-03-08 14:20:20 +01:00
|
|
|
spArg += ptrSize
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-03-08 14:20:20 +01:00
|
|
|
memmove(unsafe.Pointer(spArg), unsafe.Pointer(argp), uintptr(narg))
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
memclr(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
|
|
|
|
|
newg.sched.sp = sp
|
|
|
|
|
newg.sched.pc = funcPC(goexit) + _PCQuantum // +PCQuantum so that previous instruction is in same function
|
2014-12-22 22:43:49 -05:00
|
|
|
newg.sched.g = guintptr(unsafe.Pointer(newg))
|
2014-11-11 17:08:33 -05:00
|
|
|
gostartcallfn(&newg.sched, fn)
|
|
|
|
|
newg.gopc = callerpc
|
2014-12-12 18:41:57 +01:00
|
|
|
newg.startpc = fn.fn
|
2014-11-11 17:08:33 -05:00
|
|
|
casgstatus(newg, _Gdead, _Grunnable)
|
|
|
|
|
|
|
|
|
|
if _p_.goidcache == _p_.goidcacheend {
|
|
|
|
|
// Sched.goidgen is the last allocated id,
|
|
|
|
|
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
|
|
|
|
|
// At startup sched.goidgen=0, so main goroutine receives goid=1.
|
|
|
|
|
_p_.goidcache = xadd64(&sched.goidgen, _GoidCacheBatch)
|
|
|
|
|
_p_.goidcache -= _GoidCacheBatch - 1
|
|
|
|
|
_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
|
|
|
|
|
}
|
|
|
|
|
newg.goid = int64(_p_.goidcache)
|
|
|
|
|
_p_.goidcache++
|
|
|
|
|
if raceenabled {
|
|
|
|
|
newg.racectx = racegostart(callerpc)
|
|
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoCreate(newg, newg.startpc)
|
|
|
|
|
}
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
runqput(_p_, newg, true)
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic
|
|
|
|
|
wakep()
|
|
|
|
|
}
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
|
2015-01-05 16:29:21 +00:00
|
|
|
_g_.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
return newg
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Put on gfree list.
|
|
|
|
|
// If local list is too long, transfer a batch to the global list.
|
|
|
|
|
func gfput(_p_ *p, gp *g) {
|
|
|
|
|
if readgstatus(gp) != _Gdead {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("gfput: bad status (not Gdead)")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
2015-05-20 15:29:53 -04:00
|
|
|
stksize := gp.stackAlloc
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
if stksize != _FixedStack {
|
|
|
|
|
// non-standard stack size - free it.
|
2015-05-20 15:29:53 -04:00
|
|
|
stackfree(gp.stack, gp.stackAlloc)
|
2014-11-11 17:08:33 -05:00
|
|
|
gp.stack.lo = 0
|
|
|
|
|
gp.stack.hi = 0
|
2015-01-05 16:29:21 +00:00
|
|
|
gp.stackguard0 = 0
|
2015-05-20 16:16:04 -04:00
|
|
|
gp.stkbar = nil
|
|
|
|
|
gp.stkbarPos = 0
|
2015-06-24 17:13:24 -04:00
|
|
|
} else {
|
|
|
|
|
// Reset stack barriers.
|
|
|
|
|
gp.stkbar = gp.stkbar[:0]
|
|
|
|
|
gp.stkbarPos = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
gp.schedlink.set(_p_.gfree)
|
2014-11-11 17:08:33 -05:00
|
|
|
_p_.gfree = gp
|
|
|
|
|
_p_.gfreecnt++
|
|
|
|
|
if _p_.gfreecnt >= 64 {
|
|
|
|
|
lock(&sched.gflock)
|
|
|
|
|
for _p_.gfreecnt >= 32 {
|
|
|
|
|
_p_.gfreecnt--
|
|
|
|
|
gp = _p_.gfree
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_.gfree = gp.schedlink.ptr()
|
|
|
|
|
gp.schedlink.set(sched.gfree)
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.gfree = gp
|
|
|
|
|
sched.ngfree++
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.gflock)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get from gfree list.
|
|
|
|
|
// If local list is empty, grab a batch from global list.
|
|
|
|
|
func gfget(_p_ *p) *g {
|
|
|
|
|
retry:
|
|
|
|
|
gp := _p_.gfree
|
|
|
|
|
if gp == nil && sched.gfree != nil {
|
|
|
|
|
lock(&sched.gflock)
|
|
|
|
|
for _p_.gfreecnt < 32 && sched.gfree != nil {
|
|
|
|
|
_p_.gfreecnt++
|
|
|
|
|
gp = sched.gfree
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.gfree = gp.schedlink.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.ngfree--
|
2015-04-17 00:21:30 -04:00
|
|
|
gp.schedlink.set(_p_.gfree)
|
2014-11-11 17:08:33 -05:00
|
|
|
_p_.gfree = gp
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.gflock)
|
|
|
|
|
goto retry
|
|
|
|
|
}
|
|
|
|
|
if gp != nil {
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_.gfree = gp.schedlink.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
_p_.gfreecnt--
|
|
|
|
|
if gp.stack.lo == 0 {
|
|
|
|
|
// Stack was deallocated in gfput. Allocate a new one.
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2015-05-20 16:16:04 -04:00
|
|
|
gp.stack, gp.stkbar = stackalloc(_FixedStack)
|
2014-11-11 17:08:33 -05:00
|
|
|
})
|
2015-01-05 16:29:21 +00:00
|
|
|
gp.stackguard0 = gp.stack.lo + _StackGuard
|
2015-05-20 15:29:53 -04:00
|
|
|
gp.stackAlloc = _FixedStack
|
2014-11-11 17:08:33 -05:00
|
|
|
} else {
|
|
|
|
|
if raceenabled {
|
2015-05-20 15:29:53 -04:00
|
|
|
racemalloc(unsafe.Pointer(gp.stack.lo), gp.stackAlloc)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return gp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Purge all cached G's from gfree list to the global list.
|
|
|
|
|
func gfpurge(_p_ *p) {
|
|
|
|
|
lock(&sched.gflock)
|
|
|
|
|
for _p_.gfreecnt != 0 {
|
|
|
|
|
_p_.gfreecnt--
|
|
|
|
|
gp := _p_.gfree
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_.gfree = gp.schedlink.ptr()
|
|
|
|
|
gp.schedlink.set(sched.gfree)
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.gfree = gp
|
|
|
|
|
sched.ngfree++
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.gflock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Breakpoint executes a breakpoint trap.
|
|
|
|
|
func Breakpoint() {
|
|
|
|
|
breakpoint()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dolockOSThread is called by LockOSThread and lockOSThread below
|
|
|
|
|
// after they modify m.locked. Do not allow preemption during this call,
|
|
|
|
|
// or else the m might be different in this function than in the caller.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func dolockOSThread() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
_g_.m.lockedg = _g_
|
|
|
|
|
_g_.lockedm = _g_.m
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
|
|
|
|
|
|
// LockOSThread wires the calling goroutine to its current operating system thread.
|
|
|
|
|
// Until the calling goroutine exits or calls UnlockOSThread, it will always
|
|
|
|
|
// execute in that thread, and no other goroutine can.
|
|
|
|
|
func LockOSThread() {
|
|
|
|
|
getg().m.locked |= _LockExternal
|
|
|
|
|
dolockOSThread()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func lockOSThread() {
|
|
|
|
|
getg().m.locked += _LockInternal
|
|
|
|
|
dolockOSThread()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
|
|
|
|
|
// after they update m->locked. Do not allow preemption during this call,
|
|
|
|
|
// or else the m might be in different in this function than in the caller.
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func dounlockOSThread() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
if _g_.m.locked != 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
_g_.m.lockedg = nil
|
|
|
|
|
_g_.lockedm = nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
|
|
|
|
|
|
// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
|
|
|
|
|
// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
|
|
|
|
|
func UnlockOSThread() {
|
|
|
|
|
getg().m.locked &^= _LockExternal
|
|
|
|
|
dounlockOSThread()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func unlockOSThread() {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
if _g_.m.locked < _LockInternal {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(badunlockosthread)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
_g_.m.locked -= _LockInternal
|
|
|
|
|
dounlockOSThread()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func badunlockosthread() {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func gcount() int32 {
|
|
|
|
|
n := int32(allglen) - sched.ngfree
|
|
|
|
|
for i := 0; ; i++ {
|
|
|
|
|
_p_ := allp[i]
|
|
|
|
|
if _p_ == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
n -= _p_.gfreecnt
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// All these variables can be changed concurrently, so the result can be inconsistent.
|
|
|
|
|
// But at least the current goroutine is running.
|
|
|
|
|
if n < 1 {
|
|
|
|
|
n = 1
|
|
|
|
|
}
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func mcount() int32 {
|
|
|
|
|
return sched.mcount
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var prof struct {
|
|
|
|
|
lock uint32
|
|
|
|
|
hz int32
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func _System() { _System() }
|
|
|
|
|
func _ExternalCode() { _ExternalCode() }
|
|
|
|
|
func _GC() { _GC() }
|
|
|
|
|
|
|
|
|
|
// Called if we receive a SIGPROF signal.
|
2015-02-25 14:41:21 +09:00
|
|
|
func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
|
2014-11-11 17:08:33 -05:00
|
|
|
if prof.hz == 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Profiling runs concurrently with GC, so it must not allocate.
|
|
|
|
|
mp.mallocing++
|
|
|
|
|
|
2015-07-28 14:33:39 -04:00
|
|
|
// Coordinate with stack barrier insertion in scanstack.
|
|
|
|
|
for !cas(&gp.stackLock, 0, 1) {
|
|
|
|
|
osyield()
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// Define that a "user g" is a user-created goroutine, and a "system g"
|
2015-04-30 15:32:54 +01:00
|
|
|
// is one that is m->g0 or m->gsignal.
|
2014-11-11 17:08:33 -05:00
|
|
|
//
|
2015-04-30 15:32:54 +01:00
|
|
|
// We might be interrupted for profiling halfway through a
|
2014-11-11 17:08:33 -05:00
|
|
|
// goroutine switch. The switch involves updating three (or four) values:
|
|
|
|
|
// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
|
|
|
|
|
// because once it gets updated the new g is running.
|
|
|
|
|
//
|
|
|
|
|
// When switching from a user g to a system g, LR is not considered live,
|
|
|
|
|
// so the update only affects g, SP, and PC. Since PC must be last, there
|
|
|
|
|
// the possible partial transitions in ordinary execution are (1) g alone is updated,
|
|
|
|
|
// (2) both g and SP are updated, and (3) SP alone is updated.
|
2015-04-30 15:32:54 +01:00
|
|
|
// If SP or g alone is updated, we can detect the partial transition by checking
|
2014-11-11 17:08:33 -05:00
|
|
|
// whether the SP is within g's stack bounds. (We could also require that SP
|
|
|
|
|
// be changed only after g, but the stack bounds check is needed by other
|
|
|
|
|
// cases, so there is no need to impose an additional requirement.)
|
|
|
|
|
//
|
|
|
|
|
// There is one exceptional transition to a system g, not in ordinary execution.
|
|
|
|
|
// When a signal arrives, the operating system starts the signal handler running
|
|
|
|
|
// with an updated PC and SP. The g is updated last, at the beginning of the
|
|
|
|
|
// handler. There are two reasons this is okay. First, until g is updated the
|
|
|
|
|
// g and SP do not match, so the stack bounds check detects the partial transition.
|
|
|
|
|
// Second, signal handlers currently run with signals disabled, so a profiling
|
|
|
|
|
// signal cannot arrive during the handler.
|
|
|
|
|
//
|
|
|
|
|
// When switching from a system g to a user g, there are three possibilities.
|
|
|
|
|
//
|
|
|
|
|
// First, it may be that the g switch has no PC update, because the SP
|
|
|
|
|
// either corresponds to a user g throughout (as in asmcgocall)
|
|
|
|
|
// or because it has been arranged to look like a user g frame
|
|
|
|
|
// (as in cgocallback_gofunc). In this case, since the entire
|
|
|
|
|
// transition is a g+SP update, a partial transition updating just one of
|
|
|
|
|
// those will be detected by the stack bounds check.
|
|
|
|
|
//
|
|
|
|
|
// Second, when returning from a signal handler, the PC and SP updates
|
|
|
|
|
// are performed by the operating system in an atomic update, so the g
|
|
|
|
|
// update must be done before them. The stack bounds check detects
|
|
|
|
|
// the partial transition here, and (again) signal handlers run with signals
|
|
|
|
|
// disabled, so a profiling signal cannot arrive then anyway.
|
|
|
|
|
//
|
|
|
|
|
// Third, the common case: it may be that the switch updates g, SP, and PC
|
2015-04-30 15:32:54 +01:00
|
|
|
// separately. If the PC is within any of the functions that does this,
|
|
|
|
|
// we don't ask for a traceback. C.F. the function setsSP for more about this.
|
2014-11-11 17:08:33 -05:00
|
|
|
//
|
|
|
|
|
// There is another apparently viable approach, recorded here in case
|
2015-04-30 15:32:54 +01:00
|
|
|
// the "PC within setsSP function" check turns out not to be usable.
|
2014-11-11 17:08:33 -05:00
|
|
|
// It would be possible to delay the update of either g or SP until immediately
|
|
|
|
|
// before the PC update instruction. Then, because of the stack bounds check,
|
|
|
|
|
// the only problematic interrupt point is just before that PC update instruction,
|
|
|
|
|
// and the sigprof handler can detect that instruction and simulate stepping past
|
|
|
|
|
// it in order to reach a consistent state. On ARM, the update of g must be made
|
|
|
|
|
// in two places (in R10 and also in a TLS slot), so the delayed update would
|
|
|
|
|
// need to be the SP update. The sigprof handler must read the instruction at
|
|
|
|
|
// the current PC and if it was the known instruction (for example, JMP BX or
|
|
|
|
|
// MOV R2, PC), use that other register in place of the PC value.
|
|
|
|
|
// The biggest drawback to this solution is that it requires that we can tell
|
|
|
|
|
// whether it's safe to read from the memory pointed at by PC.
|
|
|
|
|
// In a correct program, we can test PC == nil and otherwise read,
|
|
|
|
|
// but if a profiling signal happens at the instant that a program executes
|
|
|
|
|
// a bad jump (before the program manages to handle the resulting fault)
|
|
|
|
|
// the profiling handler could fault trying to read nonexistent memory.
|
|
|
|
|
//
|
|
|
|
|
// To recap, there are no constraints on the assembly being used for the
|
|
|
|
|
// transition. We simply require that g and SP match and that the PC is not
|
|
|
|
|
// in gogo.
|
2015-02-25 14:41:21 +09:00
|
|
|
traceback := true
|
2015-04-30 15:32:54 +01:00
|
|
|
if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) {
|
2014-11-11 17:08:33 -05:00
|
|
|
traceback = false
|
|
|
|
|
}
|
2015-02-25 14:41:21 +09:00
|
|
|
var stk [maxCPUProfStack]uintptr
|
|
|
|
|
n := 0
|
2015-04-30 15:32:54 +01:00
|
|
|
if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
|
|
|
|
|
// Cgo, we can't unwind and symbolize arbitrary C code,
|
|
|
|
|
// so instead collect Go stack that leads to the cgo call.
|
|
|
|
|
// This is especially important on windows, since all syscalls are cgo calls.
|
|
|
|
|
n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
|
|
|
|
|
} else if traceback {
|
|
|
|
|
n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap|_TraceJumpStack)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if !traceback || n <= 0 {
|
|
|
|
|
// Normal traceback is impossible or has failed.
|
|
|
|
|
// See if it falls into several common cases.
|
|
|
|
|
n = 0
|
2015-04-17 00:21:30 -04:00
|
|
|
if GOOS == "windows" && n == 0 && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
// Libcall, i.e. runtime syscall on windows.
|
|
|
|
|
// Collect Go stack that leads to the call.
|
2015-04-17 00:21:30 -04:00
|
|
|
n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if n == 0 {
|
|
|
|
|
// If all of the above has failed, account it against abstract "System" or "GC".
|
|
|
|
|
n = 2
|
|
|
|
|
// "ExternalCode" is better than "etext".
|
2015-04-07 12:55:02 +12:00
|
|
|
if pc > firstmoduledata.etext {
|
2015-02-25 14:41:21 +09:00
|
|
|
pc = funcPC(_ExternalCode) + _PCQuantum
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-02-25 14:41:21 +09:00
|
|
|
stk[0] = pc
|
2015-01-30 15:30:41 -05:00
|
|
|
if mp.preemptoff != "" || mp.helpgc != 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
stk[1] = funcPC(_GC) + _PCQuantum
|
|
|
|
|
} else {
|
|
|
|
|
stk[1] = funcPC(_System) + _PCQuantum
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-07-28 14:33:39 -04:00
|
|
|
atomicstore(&gp.stackLock, 0)
|
2014-11-11 17:08:33 -05:00
|
|
|
|
|
|
|
|
if prof.hz != 0 {
|
|
|
|
|
// Simple cas-lock to coordinate with setcpuprofilerate.
|
|
|
|
|
for !cas(&prof.lock, 0, 1) {
|
|
|
|
|
osyield()
|
|
|
|
|
}
|
|
|
|
|
if prof.hz != 0 {
|
2015-02-25 14:41:21 +09:00
|
|
|
cpuprof.add(stk[:n])
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
atomicstore(&prof.lock, 0)
|
|
|
|
|
}
|
|
|
|
|
mp.mallocing--
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-30 15:32:54 +01:00
|
|
|
// Reports whether a function will set the SP
|
|
|
|
|
// to an absolute value. Important that
|
|
|
|
|
// we don't traceback when these are at the bottom
|
|
|
|
|
// of the stack since we can't be sure that we will
|
|
|
|
|
// find the caller.
|
|
|
|
|
//
|
|
|
|
|
// If the function is not on the bottom of the stack
|
|
|
|
|
// we assume that it will have set it up so that traceback will be consistent,
|
|
|
|
|
// either by being a traceback terminating function
|
|
|
|
|
// or putting one on the stack at the right offset.
|
|
|
|
|
func setsSP(pc uintptr) bool {
|
|
|
|
|
f := findfunc(pc)
|
|
|
|
|
if f == nil {
|
|
|
|
|
// couldn't find the function for this PC,
|
|
|
|
|
// so assume the worst and stop traceback
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
switch f.entry {
|
|
|
|
|
case gogoPC, systemstackPC, mcallPC, morestackPC:
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// Arrange to call fn with a traceback hz times a second.
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
func setcpuprofilerate_m(hz int32) {
|
2014-11-11 17:08:33 -05:00
|
|
|
// Force sane arguments.
|
|
|
|
|
if hz < 0 {
|
|
|
|
|
hz = 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Disable preemption, otherwise we can be rescheduled to another thread
|
|
|
|
|
// that has profiling enabled.
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
_g_ := getg()
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.m.locks++
|
|
|
|
|
|
|
|
|
|
// Stop profiler on this thread so that it is safe to lock prof.
|
|
|
|
|
// if a profiling signal came in while we had prof locked,
|
|
|
|
|
// it would deadlock.
|
|
|
|
|
resetcpuprofiler(0)
|
|
|
|
|
|
|
|
|
|
for !cas(&prof.lock, 0, 1) {
|
|
|
|
|
osyield()
|
|
|
|
|
}
|
|
|
|
|
prof.hz = hz
|
|
|
|
|
atomicstore(&prof.lock, 0)
|
|
|
|
|
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
sched.profilehz = hz
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
|
|
|
|
|
if hz != 0 {
|
|
|
|
|
resetcpuprofiler(hz)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Change number of processors. The world is stopped, sched is locked.
|
2014-11-15 08:00:38 -05:00
|
|
|
// gcworkbufs are not being modified by either the GC or
|
|
|
|
|
// the write barrier code.
|
2014-12-22 18:14:00 +03:00
|
|
|
// Returns list of Ps with local work, they need to be scheduled by the caller.
|
2015-02-03 11:20:58 +03:00
|
|
|
func procresize(nprocs int32) *p {
|
2014-11-11 17:08:33 -05:00
|
|
|
old := gomaxprocs
|
2015-02-03 11:20:58 +03:00
|
|
|
if old < 0 || old > _MaxGomaxprocs || nprocs <= 0 || nprocs > _MaxGomaxprocs {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("procresize: invalid arg")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-02-03 11:20:58 +03:00
|
|
|
traceGomaxprocs(nprocs)
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
|
2015-04-01 13:47:35 -04:00
|
|
|
// update statistics
|
|
|
|
|
now := nanotime()
|
|
|
|
|
if sched.procresizetime != 0 {
|
|
|
|
|
sched.totaltime += int64(old) * (now - sched.procresizetime)
|
|
|
|
|
}
|
|
|
|
|
sched.procresizetime = now
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// initialize new P's
|
2015-02-03 11:20:58 +03:00
|
|
|
for i := int32(0); i < nprocs; i++ {
|
|
|
|
|
pp := allp[i]
|
|
|
|
|
if pp == nil {
|
|
|
|
|
pp = new(p)
|
|
|
|
|
pp.id = i
|
|
|
|
|
pp.status = _Pgcstop
|
2015-02-03 00:33:02 +03:00
|
|
|
pp.sudogcache = pp.sudogbuf[:0]
|
2015-02-05 13:35:41 +00:00
|
|
|
for i := range pp.deferpool {
|
|
|
|
|
pp.deferpool[i] = pp.deferpoolbuf[i][:0]
|
|
|
|
|
}
|
2015-02-03 11:20:58 +03:00
|
|
|
atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-02-03 11:20:58 +03:00
|
|
|
if pp.mcache == nil {
|
2014-11-11 17:08:33 -05:00
|
|
|
if old == 0 && i == 0 {
|
|
|
|
|
if getg().m.mcache == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("missing mcache?")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-02-03 11:20:58 +03:00
|
|
|
pp.mcache = getg().m.mcache // bootstrap
|
2014-11-11 17:08:33 -05:00
|
|
|
} else {
|
2015-02-03 11:20:58 +03:00
|
|
|
pp.mcache = allocmcache()
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-22 18:14:00 +03:00
|
|
|
// free unused P's
|
2015-02-03 11:20:58 +03:00
|
|
|
for i := nprocs; i < old; i++ {
|
2014-12-22 18:14:00 +03:00
|
|
|
p := allp[i]
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-04-17 00:21:30 -04:00
|
|
|
if p == getg().m.p.ptr() {
|
2014-12-12 18:41:57 +01:00
|
|
|
// moving to p[0], pretend that we were descheduled
|
|
|
|
|
// and then scheduled again to keep the trace sane.
|
|
|
|
|
traceGoSched()
|
|
|
|
|
traceProcStop(p)
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-06-11 16:49:38 +03:00
|
|
|
// move all runnable goroutines to the global queue
|
2014-12-22 18:14:00 +03:00
|
|
|
for p.runqhead != p.runqtail {
|
2014-11-11 17:08:33 -05:00
|
|
|
// pop from tail of local queue
|
|
|
|
|
p.runqtail--
|
|
|
|
|
gp := p.runq[p.runqtail%uint32(len(p.runq))]
|
|
|
|
|
// push onto head of global queue
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
globrunqputhead(gp)
|
|
|
|
|
}
|
|
|
|
|
if p.runnext != 0 {
|
|
|
|
|
globrunqputhead(p.runnext.ptr())
|
|
|
|
|
p.runnext = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 21:07:33 -04:00
|
|
|
// if there's a background worker, make it runnable and put
|
|
|
|
|
// it on the global queue so it can clean itself up
|
|
|
|
|
if p.gcBgMarkWorker != nil {
|
|
|
|
|
casgstatus(p.gcBgMarkWorker, _Gwaiting, _Grunnable)
|
|
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoUnpark(p.gcBgMarkWorker, 0)
|
|
|
|
|
}
|
|
|
|
|
globrunqput(p.gcBgMarkWorker)
|
|
|
|
|
p.gcBgMarkWorker = nil
|
|
|
|
|
}
|
2015-03-04 21:24:58 +03:00
|
|
|
for i := range p.sudogbuf {
|
2015-02-03 00:33:02 +03:00
|
|
|
p.sudogbuf[i] = nil
|
|
|
|
|
}
|
|
|
|
|
p.sudogcache = p.sudogbuf[:0]
|
2015-02-05 13:35:41 +00:00
|
|
|
for i := range p.deferpool {
|
|
|
|
|
for j := range p.deferpoolbuf[i] {
|
|
|
|
|
p.deferpoolbuf[i][j] = nil
|
|
|
|
|
}
|
|
|
|
|
p.deferpool[i] = p.deferpoolbuf[i][:0]
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
freemcache(p.mcache)
|
|
|
|
|
p.mcache = nil
|
|
|
|
|
gfpurge(p)
|
2014-12-12 18:41:57 +01:00
|
|
|
traceProcFree(p)
|
2014-11-11 17:08:33 -05:00
|
|
|
p.status = _Pdead
|
|
|
|
|
// can't free P itself because it can be referenced by an M in syscall
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_g_ := getg()
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
|
2014-12-22 18:14:00 +03:00
|
|
|
// continue to use the current P
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.ptr().status = _Prunning
|
2014-12-22 18:14:00 +03:00
|
|
|
} else {
|
|
|
|
|
// release the current P and acquire allp[0]
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p != 0 {
|
|
|
|
|
_g_.m.p.ptr().m = 0
|
2014-12-22 18:14:00 +03:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p = 0
|
2014-12-22 18:14:00 +03:00
|
|
|
_g_.m.mcache = nil
|
|
|
|
|
p := allp[0]
|
2015-04-17 00:21:30 -04:00
|
|
|
p.m = 0
|
2014-12-22 18:14:00 +03:00
|
|
|
p.status = _Pidle
|
|
|
|
|
acquirep(p)
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoStart()
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2014-12-22 18:14:00 +03:00
|
|
|
var runnablePs *p
|
2015-02-03 11:20:58 +03:00
|
|
|
for i := nprocs - 1; i >= 0; i-- {
|
2014-11-11 17:08:33 -05:00
|
|
|
p := allp[i]
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p.ptr() == p {
|
2014-12-22 18:14:00 +03:00
|
|
|
continue
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
p.status = _Pidle
|
2015-04-22 12:18:01 -04:00
|
|
|
if runqempty(p) {
|
2014-12-22 18:14:00 +03:00
|
|
|
pidleput(p)
|
|
|
|
|
} else {
|
2015-04-17 00:21:30 -04:00
|
|
|
p.m.set(mget())
|
|
|
|
|
p.link.set(runnablePs)
|
2014-12-22 18:14:00 +03:00
|
|
|
runnablePs = p
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
|
2015-02-03 11:20:58 +03:00
|
|
|
atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
|
2014-12-22 18:14:00 +03:00
|
|
|
return runnablePs
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Associate p and the current m.
|
2015-04-17 00:21:30 -04:00
|
|
|
func acquirep(_p_ *p) {
|
|
|
|
|
acquirep1(_p_)
|
|
|
|
|
|
|
|
|
|
// have p; write barriers now allowed
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
_g_.m.mcache = _p_.mcache
|
|
|
|
|
|
|
|
|
|
if trace.enabled {
|
|
|
|
|
traceProcStart()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2015-04-17 00:21:30 -04:00
|
|
|
func acquirep1(_p_ *p) {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p != 0 || _g_.m.mcache != nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("acquirep: already in go")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
if _p_.m != 0 || _p_.status != _Pidle {
|
2014-11-11 17:08:33 -05:00
|
|
|
id := int32(0)
|
2015-04-17 00:21:30 -04:00
|
|
|
if _p_.m != 0 {
|
|
|
|
|
id = _p_.m.ptr().id
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("acquirep: invalid p state")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p.set(_p_)
|
|
|
|
|
_p_.m.set(_g_.m)
|
2014-11-11 17:08:33 -05:00
|
|
|
_p_.status = _Prunning
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Disassociate p and the current m.
|
|
|
|
|
func releasep() *p {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
if _g_.m.p == 0 || _g_.m.mcache == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("releasep: invalid arg")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_ := _g_.m.p.ptr()
|
|
|
|
|
if _p_.m.ptr() != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
|
|
|
|
|
print("releasep: m=", _g_.m, " m->p=", _g_.m.p.ptr(), " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("releasep: invalid p state")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
2015-04-17 00:21:30 -04:00
|
|
|
traceProcStop(_g_.m.p.ptr())
|
2014-12-12 18:41:57 +01:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
_g_.m.p = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_.m.mcache = nil
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_.m = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
_p_.status = _Pidle
|
|
|
|
|
return _p_
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func incidlelocked(v int32) {
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
sched.nmidlelocked += v
|
|
|
|
|
if v > 0 {
|
|
|
|
|
checkdead()
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check for deadlock situation.
|
|
|
|
|
// The check is based on number of running M's, if 0 -> deadlock.
|
|
|
|
|
func checkdead() {
|
2015-04-18 18:19:06 -07:00
|
|
|
// For -buildmode=c-shared or -buildmode=c-archive it's OK if
|
|
|
|
|
// there are no running goroutines. The calling program is
|
|
|
|
|
// assumed to be running.
|
|
|
|
|
if islibrary || isarchive {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// If we are dying because of a signal caught on an already idle thread,
|
|
|
|
|
// freezetheworld will cause all running threads to block.
|
|
|
|
|
// And runtime will essentially enter into deadlock state,
|
|
|
|
|
// except that there is a thread that will call exit soon.
|
|
|
|
|
if panicking > 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// -1 for sysmon
|
|
|
|
|
run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
|
|
|
|
|
if run > 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if run < 0 {
|
|
|
|
|
print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("checkdead: inconsistent counts")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
grunning := 0
|
|
|
|
|
lock(&allglock)
|
|
|
|
|
for i := 0; i < len(allgs); i++ {
|
|
|
|
|
gp := allgs[i]
|
2015-02-07 15:31:18 +03:00
|
|
|
if isSystemGoroutine(gp) {
|
2014-11-11 17:08:33 -05:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
s := readgstatus(gp)
|
|
|
|
|
switch s &^ _Gscan {
|
|
|
|
|
case _Gwaiting:
|
|
|
|
|
grunning++
|
|
|
|
|
case _Grunnable,
|
|
|
|
|
_Grunning,
|
|
|
|
|
_Gsyscall:
|
|
|
|
|
unlock(&allglock)
|
|
|
|
|
print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("checkdead: runnable g")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
unlock(&allglock)
|
|
|
|
|
if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("no goroutines (main called runtime.Goexit) - deadlock!")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Maybe jump time forward for playground.
|
|
|
|
|
gp := timejump()
|
|
|
|
|
if gp != nil {
|
|
|
|
|
casgstatus(gp, _Gwaiting, _Grunnable)
|
|
|
|
|
globrunqput(gp)
|
|
|
|
|
_p_ := pidleget()
|
|
|
|
|
if _p_ == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("checkdead: no p for timer")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
mp := mget()
|
|
|
|
|
if mp == nil {
|
2015-02-12 10:18:31 +03:00
|
|
|
newm(nil, _p_)
|
2014-11-11 17:08:33 -05:00
|
|
|
} else {
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.nextp.set(_p_)
|
2014-11-11 17:08:33 -05:00
|
|
|
notewakeup(&mp.park)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
getg().m.throwing = -1 // do not dump full stacks
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("all goroutines are asleep - deadlock!")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func sysmon() {
|
|
|
|
|
// If we go two minutes without a garbage collection, force one to run.
|
|
|
|
|
forcegcperiod := int64(2 * 60 * 1e9)
|
|
|
|
|
|
|
|
|
|
// If a heap span goes unused for 5 minutes after a garbage collection,
|
|
|
|
|
// we hand it back to the operating system.
|
|
|
|
|
scavengelimit := int64(5 * 60 * 1e9)
|
|
|
|
|
|
|
|
|
|
if debug.scavenge > 0 {
|
|
|
|
|
// Scavenge-a-lot for testing.
|
|
|
|
|
forcegcperiod = 10 * 1e6
|
|
|
|
|
scavengelimit = 20 * 1e6
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lastscavenge := nanotime()
|
|
|
|
|
nscavenge := 0
|
|
|
|
|
|
|
|
|
|
// Make wake-up period small enough for the sampling to be correct.
|
|
|
|
|
maxsleep := forcegcperiod / 2
|
|
|
|
|
if scavengelimit < forcegcperiod {
|
|
|
|
|
maxsleep = scavengelimit / 2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lasttrace := int64(0)
|
|
|
|
|
idle := 0 // how many cycles in succession we had not wokeup somebody
|
|
|
|
|
delay := uint32(0)
|
|
|
|
|
for {
|
|
|
|
|
if idle == 0 { // start with 20us sleep...
|
|
|
|
|
delay = 20
|
|
|
|
|
} else if idle > 50 { // start doubling the sleep after 1ms...
|
|
|
|
|
delay *= 2
|
|
|
|
|
}
|
|
|
|
|
if delay > 10*1000 { // up to 10ms
|
|
|
|
|
delay = 10 * 1000
|
|
|
|
|
}
|
|
|
|
|
usleep(delay)
|
|
|
|
|
if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
if atomicload(&sched.gcwaiting) != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs) {
|
|
|
|
|
atomicstore(&sched.sysmonwait, 1)
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
notetsleep(&sched.sysmonnote, maxsleep)
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
atomicstore(&sched.sysmonwait, 0)
|
|
|
|
|
noteclear(&sched.sysmonnote)
|
|
|
|
|
idle = 0
|
|
|
|
|
delay = 20
|
|
|
|
|
}
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
// poll network if not polled for more than 10ms
|
|
|
|
|
lastpoll := int64(atomicload64(&sched.lastpoll))
|
|
|
|
|
now := nanotime()
|
|
|
|
|
unixnow := unixnanotime()
|
|
|
|
|
if lastpoll != 0 && lastpoll+10*1000*1000 < now {
|
|
|
|
|
cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
|
|
|
|
|
gp := netpoll(false) // non-blocking - returns list of goroutines
|
|
|
|
|
if gp != nil {
|
|
|
|
|
// Need to decrement number of idle locked M's
|
|
|
|
|
// (pretending that one more is running) before injectglist.
|
|
|
|
|
// Otherwise it can lead to the following situation:
|
|
|
|
|
// injectglist grabs all P's but before it starts M's to run the P's,
|
|
|
|
|
// another M returns from syscall, finishes running its G,
|
|
|
|
|
// observes that there is no work to do and no other running M's
|
|
|
|
|
// and reports deadlock.
|
|
|
|
|
incidlelocked(-1)
|
|
|
|
|
injectglist(gp)
|
|
|
|
|
incidlelocked(1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// retake P's blocked in syscalls
|
|
|
|
|
// and preempt long running G's
|
|
|
|
|
if retake(now) != 0 {
|
|
|
|
|
idle = 0
|
|
|
|
|
} else {
|
|
|
|
|
idle++
|
|
|
|
|
}
|
|
|
|
|
// check if we need to force a GC
|
|
|
|
|
lastgc := int64(atomicload64(&memstats.last_gc))
|
2015-07-08 14:18:33 -07:00
|
|
|
if lastgc != 0 && unixnow-lastgc > forcegcperiod && atomicload(&forcegc.idle) != 0 && atomicloaduint(&bggc.working) == 0 {
|
2014-11-11 17:08:33 -05:00
|
|
|
lock(&forcegc.lock)
|
|
|
|
|
forcegc.idle = 0
|
2015-04-17 00:21:30 -04:00
|
|
|
forcegc.g.schedlink = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
injectglist(forcegc.g)
|
|
|
|
|
unlock(&forcegc.lock)
|
|
|
|
|
}
|
|
|
|
|
// scavenge heap once in a while
|
|
|
|
|
if lastscavenge+scavengelimit/2 < now {
|
|
|
|
|
mHeap_Scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit))
|
|
|
|
|
lastscavenge = now
|
|
|
|
|
nscavenge++
|
|
|
|
|
}
|
|
|
|
|
if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace*1000000) <= now {
|
|
|
|
|
lasttrace = now
|
|
|
|
|
schedtrace(debug.scheddetail > 0)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var pdesc [_MaxGomaxprocs]struct {
|
|
|
|
|
schedtick uint32
|
|
|
|
|
schedwhen int64
|
|
|
|
|
syscalltick uint32
|
|
|
|
|
syscallwhen int64
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-23 13:27:24 -04:00
|
|
|
// forcePreemptNS is the time slice given to a G before it is
|
|
|
|
|
// preempted.
|
|
|
|
|
const forcePreemptNS = 10 * 1000 * 1000 // 10ms
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
func retake(now int64) uint32 {
|
|
|
|
|
n := 0
|
|
|
|
|
for i := int32(0); i < gomaxprocs; i++ {
|
|
|
|
|
_p_ := allp[i]
|
|
|
|
|
if _p_ == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
pd := &pdesc[i]
|
|
|
|
|
s := _p_.status
|
|
|
|
|
if s == _Psyscall {
|
|
|
|
|
// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
|
|
|
|
|
t := int64(_p_.syscalltick)
|
|
|
|
|
if int64(pd.syscalltick) != t {
|
|
|
|
|
pd.syscalltick = uint32(t)
|
|
|
|
|
pd.syscallwhen = now
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
// On the one hand we don't want to retake Ps if there is no other work to do,
|
|
|
|
|
// but on the other hand we want to retake them eventually
|
|
|
|
|
// because they can prevent the sysmon thread from deep sleep.
|
2015-04-22 12:18:01 -04:00
|
|
|
if runqempty(_p_) && atomicload(&sched.nmspinning)+atomicload(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
|
2014-11-11 17:08:33 -05:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
// Need to decrement number of idle locked M's
|
|
|
|
|
// (pretending that one more is running) before the CAS.
|
|
|
|
|
// Otherwise the M from which we retake can exit the syscall,
|
|
|
|
|
// increment nmidle and report deadlock.
|
|
|
|
|
incidlelocked(-1)
|
|
|
|
|
if cas(&_p_.status, s, _Pidle) {
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceGoSysBlock(_p_)
|
|
|
|
|
traceProcStop(_p_)
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
n++
|
2014-12-12 18:41:57 +01:00
|
|
|
_p_.syscalltick++
|
2014-11-11 17:08:33 -05:00
|
|
|
handoffp(_p_)
|
|
|
|
|
}
|
|
|
|
|
incidlelocked(1)
|
|
|
|
|
} else if s == _Prunning {
|
2015-03-23 13:27:24 -04:00
|
|
|
// Preempt G if it's running for too long.
|
2014-11-11 17:08:33 -05:00
|
|
|
t := int64(_p_.schedtick)
|
|
|
|
|
if int64(pd.schedtick) != t {
|
|
|
|
|
pd.schedtick = uint32(t)
|
|
|
|
|
pd.schedwhen = now
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-03-23 13:27:24 -04:00
|
|
|
if pd.schedwhen+forcePreemptNS > now {
|
2014-11-11 17:08:33 -05:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
preemptone(_p_)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return uint32(n)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Tell all goroutines that they have been preempted and they should stop.
|
|
|
|
|
// This function is purely best-effort. It can fail to inform a goroutine if a
|
|
|
|
|
// processor just started running it.
|
|
|
|
|
// No locks need to be held.
|
|
|
|
|
// Returns true if preemption request was issued to at least one goroutine.
|
|
|
|
|
func preemptall() bool {
|
|
|
|
|
res := false
|
|
|
|
|
for i := int32(0); i < gomaxprocs; i++ {
|
|
|
|
|
_p_ := allp[i]
|
|
|
|
|
if _p_ == nil || _p_.status != _Prunning {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if preemptone(_p_) {
|
|
|
|
|
res = true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return res
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Tell the goroutine running on processor P to stop.
|
|
|
|
|
// This function is purely best-effort. It can incorrectly fail to inform the
|
|
|
|
|
// goroutine. It can send inform the wrong goroutine. Even if it informs the
|
|
|
|
|
// correct goroutine, that goroutine might ignore the request if it is
|
|
|
|
|
// simultaneously executing newstack.
|
|
|
|
|
// No lock needs to be held.
|
|
|
|
|
// Returns true if preemption request was issued.
|
|
|
|
|
// The actual preemption will happen at some point in the future
|
|
|
|
|
// and will be indicated by the gp->status no longer being
|
|
|
|
|
// Grunning
|
|
|
|
|
func preemptone(_p_ *p) bool {
|
2015-04-17 00:21:30 -04:00
|
|
|
mp := _p_.m.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
if mp == nil || mp == getg().m {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
gp := mp.curg
|
|
|
|
|
if gp == nil || gp == mp.g0 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gp.preempt = true
|
|
|
|
|
|
|
|
|
|
// Every call in a go routine checks for stack overflow by
|
2015-01-05 16:29:21 +00:00
|
|
|
// comparing the current stack pointer to gp->stackguard0.
|
|
|
|
|
// Setting gp->stackguard0 to StackPreempt folds
|
2014-11-11 17:08:33 -05:00
|
|
|
// preemption into the normal stack overflow check.
|
2015-01-05 16:29:21 +00:00
|
|
|
gp.stackguard0 = stackPreempt
|
2014-11-11 17:08:33 -05:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var starttime int64
|
|
|
|
|
|
|
|
|
|
func schedtrace(detailed bool) {
|
|
|
|
|
now := nanotime()
|
|
|
|
|
if starttime == 0 {
|
|
|
|
|
starttime = now
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
|
|
|
|
|
if detailed {
|
|
|
|
|
print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
|
|
|
|
|
}
|
|
|
|
|
// We must be careful while reading data from P's, M's and G's.
|
|
|
|
|
// Even if we hold schedlock, most data can be changed concurrently.
|
|
|
|
|
// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
|
|
|
|
|
for i := int32(0); i < gomaxprocs; i++ {
|
|
|
|
|
_p_ := allp[i]
|
|
|
|
|
if _p_ == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
mp := _p_.m.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
h := atomicload(&_p_.runqhead)
|
|
|
|
|
t := atomicload(&_p_.runqtail)
|
|
|
|
|
if detailed {
|
|
|
|
|
id := int32(-1)
|
|
|
|
|
if mp != nil {
|
|
|
|
|
id = mp.id
|
|
|
|
|
}
|
|
|
|
|
print(" P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n")
|
|
|
|
|
} else {
|
|
|
|
|
// In non-detailed mode format lengths of per-P run queues as:
|
|
|
|
|
// [len1 len2 len3 len4]
|
|
|
|
|
print(" ")
|
|
|
|
|
if i == 0 {
|
|
|
|
|
print("[")
|
|
|
|
|
}
|
|
|
|
|
print(t - h)
|
|
|
|
|
if i == gomaxprocs-1 {
|
|
|
|
|
print("]\n")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !detailed {
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for mp := allm; mp != nil; mp = mp.alllink {
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_ := mp.p.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
gp := mp.curg
|
|
|
|
|
lockedg := mp.lockedg
|
|
|
|
|
id1 := int32(-1)
|
|
|
|
|
if _p_ != nil {
|
|
|
|
|
id1 = _p_.id
|
|
|
|
|
}
|
|
|
|
|
id2 := int64(-1)
|
|
|
|
|
if gp != nil {
|
|
|
|
|
id2 = gp.goid
|
|
|
|
|
}
|
|
|
|
|
id3 := int64(-1)
|
|
|
|
|
if lockedg != nil {
|
|
|
|
|
id3 = lockedg.goid
|
|
|
|
|
}
|
2015-01-30 15:30:41 -05:00
|
|
|
print(" M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", getg().m.blocked, " lockedg=", id3, "\n")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lock(&allglock)
|
|
|
|
|
for gi := 0; gi < len(allgs); gi++ {
|
|
|
|
|
gp := allgs[gi]
|
|
|
|
|
mp := gp.m
|
|
|
|
|
lockedm := gp.lockedm
|
|
|
|
|
id1 := int32(-1)
|
|
|
|
|
if mp != nil {
|
|
|
|
|
id1 = mp.id
|
|
|
|
|
}
|
|
|
|
|
id2 := int32(-1)
|
|
|
|
|
if lockedm != nil {
|
|
|
|
|
id2 = lockedm.id
|
|
|
|
|
}
|
|
|
|
|
print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
|
|
|
|
|
}
|
|
|
|
|
unlock(&allglock)
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Put mp on midle list.
|
|
|
|
|
// Sched must be locked.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func mput(mp *m) {
|
2015-04-17 00:21:30 -04:00
|
|
|
mp.schedlink = sched.midle
|
|
|
|
|
sched.midle.set(mp)
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.nmidle++
|
|
|
|
|
checkdead()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to get an m from midle list.
|
|
|
|
|
// Sched must be locked.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func mget() *m {
|
2015-04-17 00:21:30 -04:00
|
|
|
mp := sched.midle.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
if mp != nil {
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.midle = mp.schedlink
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.nmidle--
|
|
|
|
|
}
|
|
|
|
|
return mp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Put gp on the global runnable queue.
|
|
|
|
|
// Sched must be locked.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func globrunqput(gp *g) {
|
2015-04-17 00:21:30 -04:00
|
|
|
gp.schedlink = 0
|
|
|
|
|
if sched.runqtail != 0 {
|
|
|
|
|
sched.runqtail.ptr().schedlink.set(gp)
|
2014-11-11 17:08:33 -05:00
|
|
|
} else {
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.runqhead.set(gp)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.runqtail.set(gp)
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.runqsize++
|
|
|
|
|
}
|
|
|
|
|
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
// Put gp at the head of the global runnable queue.
|
|
|
|
|
// Sched must be locked.
|
|
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
|
func globrunqputhead(gp *g) {
|
|
|
|
|
gp.schedlink = sched.runqhead
|
|
|
|
|
sched.runqhead.set(gp)
|
|
|
|
|
if sched.runqtail == 0 {
|
|
|
|
|
sched.runqtail.set(gp)
|
|
|
|
|
}
|
|
|
|
|
sched.runqsize++
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// Put a batch of runnable goroutines on the global runnable queue.
|
|
|
|
|
// Sched must be locked.
|
|
|
|
|
func globrunqputbatch(ghead *g, gtail *g, n int32) {
|
2015-04-17 00:21:30 -04:00
|
|
|
gtail.schedlink = 0
|
|
|
|
|
if sched.runqtail != 0 {
|
|
|
|
|
sched.runqtail.ptr().schedlink.set(ghead)
|
2014-11-11 17:08:33 -05:00
|
|
|
} else {
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.runqhead.set(ghead)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.runqtail.set(gtail)
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.runqsize += n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try get a batch of G's from the global runnable queue.
|
|
|
|
|
// Sched must be locked.
|
|
|
|
|
func globrunqget(_p_ *p, max int32) *g {
|
|
|
|
|
if sched.runqsize == 0 {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n := sched.runqsize/gomaxprocs + 1
|
|
|
|
|
if n > sched.runqsize {
|
|
|
|
|
n = sched.runqsize
|
|
|
|
|
}
|
|
|
|
|
if max > 0 && n > max {
|
|
|
|
|
n = max
|
|
|
|
|
}
|
|
|
|
|
if n > int32(len(_p_.runq))/2 {
|
|
|
|
|
n = int32(len(_p_.runq)) / 2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sched.runqsize -= n
|
|
|
|
|
if sched.runqsize == 0 {
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.runqtail = 0
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
2015-04-17 00:21:30 -04:00
|
|
|
gp := sched.runqhead.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.runqhead = gp.schedlink
|
|
|
|
|
n--
|
|
|
|
|
for ; n > 0; n-- {
|
2015-04-17 00:21:30 -04:00
|
|
|
gp1 := sched.runqhead.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
sched.runqhead = gp1.schedlink
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
runqput(_p_, gp1, false)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
return gp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Put p to on _Pidle list.
|
|
|
|
|
// Sched must be locked.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func pidleput(_p_ *p) {
|
2015-04-24 16:37:31 -04:00
|
|
|
if !runqempty(_p_) {
|
|
|
|
|
throw("pidleput: P has non-empty run queue")
|
|
|
|
|
}
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_.link = sched.pidle
|
|
|
|
|
sched.pidle.set(_p_)
|
2014-11-11 17:08:33 -05:00
|
|
|
xadd(&sched.npidle, 1) // TODO: fast atomic
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try get a p from _Pidle list.
|
|
|
|
|
// Sched must be locked.
|
runtime: Remove write barriers during STW.
The GC assumes that there will be no asynchronous write barriers when
the world is stopped. This keeps the synchronization between write
barriers and the GC simple. However, currently, there are a few places
in runtime code where this assumption does not hold.
The GC stops the world by collecting all Ps, which stops all user Go
code, but small parts of the runtime can run without a P. For example,
the code that releases a P must still deschedule its G onto a runnable
queue before stopping. Similarly, when a G returns from a long-running
syscall, it must run code to reacquire a P.
Currently, this code can contain write barriers. This can lead to the
GC collecting reachable objects if something like the following
sequence of events happens:
1. GC stops the world by collecting all Ps.
2. G #1 returns from a syscall (for example), tries to install a
pointer to object X, and calls greyobject on X.
3. greyobject on G #1 marks X, but does not yet add it to a write
buffer. At this point, X is effectively black, not grey, even though
it may point to white objects.
4. GC reaches X through some other path and calls greyobject on X, but
greyobject does nothing because X is already marked.
5. GC completes.
6. greyobject on G #1 adds X to a work buffer, but it's too late.
7. Objects that were reachable only through X are incorrectly collected.
To fix this, we check the invariant that no asynchronous write
barriers happen when the world is stopped by checking that write
barriers always have a P, and modify all currently known sources of
these writes to disable the write barrier. In all modified cases this
is safe because the object in question will always be reachable via
some other path.
Some of the trace code was turned off, in particular the
code that traces returning from a syscall. The GC assumes
that as far as the heap is concerned the thread is stopped
when it is in a syscall. Upon returning the trace code
must not do any heap writes for the same reasons discussed
above.
Fixes #10098
Fixes #9953
Fixes #9951
Fixes #9884
May relate to #9610 #9771
Change-Id: Ic2e70b7caffa053e56156838eb8d89503e3c0c8a
Reviewed-on: https://go-review.googlesource.com/7504
Reviewed-by: Austin Clements <austin@google.com>
2015-03-12 14:19:21 -04:00
|
|
|
// May run during STW, so write barriers are not allowed.
|
|
|
|
|
//go:nowritebarrier
|
2014-11-11 17:08:33 -05:00
|
|
|
func pidleget() *p {
|
2015-04-17 00:21:30 -04:00
|
|
|
_p_ := sched.pidle.ptr()
|
2014-11-11 17:08:33 -05:00
|
|
|
if _p_ != nil {
|
2015-04-17 00:21:30 -04:00
|
|
|
sched.pidle = _p_.link
|
2014-11-11 17:08:33 -05:00
|
|
|
xadd(&sched.npidle, -1) // TODO: fast atomic
|
|
|
|
|
}
|
|
|
|
|
return _p_
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-22 12:18:01 -04:00
|
|
|
// runqempty returns true if _p_ has no Gs on its local run queue.
|
|
|
|
|
// Note that this test is generally racy.
|
|
|
|
|
func runqempty(_p_ *p) bool {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return _p_.runqhead == _p_.runqtail && _p_.runnext == 0
|
2015-04-22 12:18:01 -04:00
|
|
|
}
|
|
|
|
|
|
2015-07-01 14:12:31 -04:00
|
|
|
// To shake out latent assumptions about scheduling order,
|
|
|
|
|
// we introduce some randomness into scheduling decisions
|
|
|
|
|
// when running with the race detector.
|
|
|
|
|
// The need for this was made obvious by changing the
|
|
|
|
|
// (deterministic) scheduling order in Go 1.5 and breaking
|
|
|
|
|
// many poorly-written tests.
|
|
|
|
|
// With the randomness here, as long as the tests pass
|
|
|
|
|
// consistently with -race, they shouldn't have latent scheduling
|
|
|
|
|
// assumptions.
|
|
|
|
|
const randomizeScheduler = raceenabled
|
|
|
|
|
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
// runqput tries to put g on the local runnable queue.
|
|
|
|
|
// If next if false, runqput adds g to the tail of the runnable queue.
|
|
|
|
|
// If next is true, runqput puts g in the _p_.runnext slot.
|
|
|
|
|
// If the run queue is full, runnext puts g on the global queue.
|
2014-11-11 17:08:33 -05:00
|
|
|
// Executed only by the owner P.
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
func runqput(_p_ *p, gp *g, next bool) {
|
2015-07-01 14:12:31 -04:00
|
|
|
if randomizeScheduler && next && fastrand1()%2 == 0 {
|
|
|
|
|
next = false
|
|
|
|
|
}
|
|
|
|
|
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
if next {
|
|
|
|
|
retryNext:
|
|
|
|
|
oldnext := _p_.runnext
|
|
|
|
|
if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) {
|
|
|
|
|
goto retryNext
|
|
|
|
|
}
|
|
|
|
|
if oldnext == 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
// Kick the old runnext out to the regular run queue.
|
|
|
|
|
gp = oldnext.ptr()
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
retry:
|
|
|
|
|
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
|
|
|
|
|
t := _p_.runqtail
|
|
|
|
|
if t-h < uint32(len(_p_.runq)) {
|
|
|
|
|
_p_.runq[t%uint32(len(_p_.runq))] = gp
|
|
|
|
|
atomicstore(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if runqputslow(_p_, gp, h, t) {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
// the queue is not full, now the put above must suceed
|
|
|
|
|
goto retry
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Put g and a batch of work from local runnable queue on global queue.
|
|
|
|
|
// Executed only by the owner P.
|
|
|
|
|
func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
|
|
|
|
|
var batch [len(_p_.runq)/2 + 1]*g
|
|
|
|
|
|
|
|
|
|
// First, grab a batch from local queue.
|
|
|
|
|
n := t - h
|
|
|
|
|
n = n / 2
|
|
|
|
|
if n != uint32(len(_p_.runq)/2) {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("runqputslow: queue is not full")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
for i := uint32(0); i < n; i++ {
|
|
|
|
|
batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
|
|
|
|
|
}
|
|
|
|
|
if !cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
batch[n] = gp
|
|
|
|
|
|
2015-07-01 14:12:31 -04:00
|
|
|
if randomizeScheduler {
|
|
|
|
|
for i := uint32(1); i <= n; i++ {
|
|
|
|
|
j := fastrand1() % (i + 1)
|
|
|
|
|
batch[i], batch[j] = batch[j], batch[i]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
// Link the goroutines.
|
|
|
|
|
for i := uint32(0); i < n; i++ {
|
2015-04-17 00:21:30 -04:00
|
|
|
batch[i].schedlink.set(batch[i+1])
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now put the batch on global queue.
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
globrunqputbatch(batch[0], batch[n], int32(n+1))
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get g from local runnable queue.
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
// If inheritTime is true, gp should inherit the remaining time in the
|
|
|
|
|
// current time slice. Otherwise, it should start a new time slice.
|
2014-11-11 17:08:33 -05:00
|
|
|
// Executed only by the owner P.
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
func runqget(_p_ *p) (gp *g, inheritTime bool) {
|
|
|
|
|
// If there's a runnext, it's the next G to run.
|
|
|
|
|
for {
|
|
|
|
|
next := _p_.runnext
|
|
|
|
|
if next == 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if _p_.runnext.cas(next, 0) {
|
|
|
|
|
return next.ptr(), true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:08:33 -05:00
|
|
|
for {
|
|
|
|
|
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
|
|
|
|
|
t := _p_.runqtail
|
|
|
|
|
if t == h {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return nil, false
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
gp := _p_.runq[h%uint32(len(_p_.runq))]
|
|
|
|
|
if cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
return gp, false
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-13 17:08:16 -04:00
|
|
|
// Grabs a batch of goroutines from _p_'s runnable queue into batch.
|
|
|
|
|
// Batch is a ring buffer starting at batchHead.
|
|
|
|
|
// Returns number of grabbed goroutines.
|
2014-11-11 17:08:33 -05:00
|
|
|
// Can be executed by any P.
|
2015-05-13 17:08:16 -04:00
|
|
|
func runqgrab(_p_ *p, batch *[256]*g, batchHead uint32, stealRunNextG bool) uint32 {
|
2014-11-11 17:08:33 -05:00
|
|
|
for {
|
|
|
|
|
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
|
|
|
|
|
t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
|
|
|
|
|
n := t - h
|
|
|
|
|
n = n - n/2
|
|
|
|
|
if n == 0 {
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
if stealRunNextG {
|
|
|
|
|
// Try to steal from _p_.runnext.
|
|
|
|
|
if next := _p_.runnext; next != 0 {
|
|
|
|
|
// Sleep to ensure that _p_ isn't about to run the g we
|
|
|
|
|
// are about to steal.
|
|
|
|
|
// The important use case here is when the g running on _p_
|
|
|
|
|
// ready()s another g and then almost immediately blocks.
|
|
|
|
|
// Instead of stealing runnext in this window, back off
|
|
|
|
|
// to give _p_ a chance to schedule runnext. This will avoid
|
|
|
|
|
// thrashing gs between different Ps.
|
|
|
|
|
usleep(100)
|
|
|
|
|
if !_p_.runnext.cas(next, 0) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-05-13 17:08:16 -04:00
|
|
|
batch[batchHead%uint32(len(batch))] = next.ptr()
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
return 1
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
}
|
|
|
|
|
}
|
2014-11-11 17:08:33 -05:00
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for i := uint32(0); i < n; i++ {
|
2015-05-13 17:08:16 -04:00
|
|
|
g := _p_.runq[(h+i)%uint32(len(_p_.runq))]
|
|
|
|
|
batch[(batchHead+i)%uint32(len(batch))] = g
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Steal half of elements from local runnable queue of p2
|
|
|
|
|
// and put onto local runnable queue of p.
|
|
|
|
|
// Returns one of the stolen elements (or nil if failed).
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
func runqsteal(_p_, p2 *p, stealRunNextG bool) *g {
|
2015-05-13 17:08:16 -04:00
|
|
|
t := _p_.runqtail
|
|
|
|
|
n := runqgrab(p2, &_p_.runq, t, stealRunNextG)
|
2014-11-11 17:08:33 -05:00
|
|
|
if n == 0 {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
n--
|
2015-05-13 17:08:16 -04:00
|
|
|
gp := _p_.runq[(t+n)%uint32(len(_p_.runq))]
|
2014-11-11 17:08:33 -05:00
|
|
|
if n == 0 {
|
|
|
|
|
return gp
|
|
|
|
|
}
|
|
|
|
|
h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
|
|
|
|
|
if t-h+n >= uint32(len(_p_.runq)) {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("runqsteal: runq overflow")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
|
|
|
|
|
return gp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func testSchedLocalQueue() {
|
|
|
|
|
_p_ := new(p)
|
|
|
|
|
gs := make([]g, len(_p_.runq))
|
|
|
|
|
for i := 0; i < len(_p_.runq); i++ {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
if g, _ := runqget(_p_); g != nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("runq is not empty initially")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
for j := 0; j < i; j++ {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
runqput(_p_, &gs[i], false)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
for j := 0; j < i; j++ {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
if g, _ := runqget(_p_); g != &gs[i] {
|
2014-11-11 17:08:33 -05:00
|
|
|
print("bad element at iter ", i, "/", j, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad element")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
if g, _ := runqget(_p_); g != nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("runq is not empty afterwards")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func testSchedLocalQueueSteal() {
|
|
|
|
|
p1 := new(p)
|
|
|
|
|
p2 := new(p)
|
|
|
|
|
gs := make([]g, len(p1.runq))
|
|
|
|
|
for i := 0; i < len(p1.runq); i++ {
|
|
|
|
|
for j := 0; j < i; j++ {
|
|
|
|
|
gs[j].sig = 0
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
runqput(p1, &gs[j], false)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
runtime: reduce thrashing of gs between ps
One important use case is a pipeline computation that pass values
from one Goroutine to the next and then exits or is placed in a
wait state. If GOMAXPROCS > 1 a Goroutine running on P1 will enable
another Goroutine and then immediately make P1 available to execute
it. We need to prevent other Ps from stealing the G that P1 is about
to execute. Otherwise the Gs can thrash between Ps causing unneeded
synchronization and slowing down throughput.
Fix this by changing the stealing logic so that when a P attempts to
steal the only G on some other P's run queue, it will pause
momentarily to allow the victim P to schedule the G.
As part of optimizing stealing we also use a per P victim queue
move stolen gs. This eliminates the zeroing of a stack local victim
queue which turned out to be expensive.
This CL is a necessary but not sufficient prerequisite to changing
the default value of GOMAXPROCS to something > 1 which is another
CL/discussion.
For highly serialized programs, such as GoroutineRing below this can
make a large difference. For larger and more parallel programs such
as the x/benchmarks there is no noticeable detriment.
~/work/code/src/rsc.io/benchstat/benchstat old.txt new.txt
name old mean new mean delta
GoroutineRing 30.2µs × (0.98,1.01) 30.1µs × (0.97,1.04) ~ (p=0.941)
GoroutineRing-2 113µs × (0.91,1.07) 30µs × (0.98,1.03) -73.17% (p=0.004)
GoroutineRing-4 144µs × (0.98,1.02) 32µs × (0.98,1.01) -77.69% (p=0.000)
GoroutineRingBuf 32.7µs × (0.97,1.03) 32.5µs × (0.97,1.02) ~ (p=0.795)
GoroutineRingBuf-2 120µs × (0.92,1.08) 33µs × (1.00,1.00) -72.48% (p=0.004)
GoroutineRingBuf-4 138µs × (0.92,1.06) 33µs × (1.00,1.00) -76.21% (p=0.003)
The bench benchmarks show little impact.
old new
garbage 7032879 7011696
httpold 25509 25301
splayold 1022073 1019499
jsonold 28230624 28081433
Change-Id: I228c48fed8d85c9bbef16a7edc53ab7898506f50
Reviewed-on: https://go-review.googlesource.com/9872
Reviewed-by: Austin Clements <austin@google.com>
2015-05-07 17:19:30 -04:00
|
|
|
gp := runqsteal(p2, p1, true)
|
2014-11-11 17:08:33 -05:00
|
|
|
s := 0
|
|
|
|
|
if gp != nil {
|
|
|
|
|
s++
|
|
|
|
|
gp.sig++
|
|
|
|
|
}
|
|
|
|
|
for {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
gp, _ = runqget(p2)
|
2014-11-11 17:08:33 -05:00
|
|
|
if gp == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
s++
|
|
|
|
|
gp.sig++
|
|
|
|
|
}
|
|
|
|
|
for {
|
runtime: yield time slice to most recently readied G
Currently, when the runtime ready()s a G, it adds it to the end of the
current P's run queue and continues running. If there are many other
things in the run queue, this can result in a significant delay before
the ready()d G actually runs and can hurt fairness when other Gs in
the run queue are CPU hogs. For example, if there are three Gs sharing
a P, one of which is a CPU hog that never voluntarily gives up the P
and the other two of which are doing small amounts of work and
communicating back and forth on an unbuffered channel, the two
communicating Gs will get very little CPU time.
Change this so that when G1 ready()s G2 and then blocks, the scheduler
immediately hands off the remainder of G1's time slice to G2. In the
above example, the two communicating Gs will now act as a unit and
together get half of the CPU time, while the CPU hog gets the other
half of the CPU time.
This fixes the problem demonstrated by the ping-pong benchmark added
in the previous commit:
benchmark old ns/op new ns/op delta
BenchmarkPingPongHog 684287 825 -99.88%
On the x/benchmarks suite, this change improves the performance of
garbage by ~6% (for GOMAXPROCS=1 and 4), and json by 28% and 36% for
GOMAXPROCS=1 and 4. It has negligible effect on heap size.
This has no effect on the go1 benchmark suite since those benchmarks
are mostly single-threaded.
Change-Id: I858a08eaa78f702ea98a5fac99d28a4ac91d339f
Reviewed-on: https://go-review.googlesource.com/9289
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-22 14:42:26 -04:00
|
|
|
gp, _ = runqget(p1)
|
2014-11-11 17:08:33 -05:00
|
|
|
if gp == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
gp.sig++
|
|
|
|
|
}
|
|
|
|
|
for j := 0; j < i; j++ {
|
|
|
|
|
if gs[j].sig != 1 {
|
|
|
|
|
print("bad element ", j, "(", gs[j].sig, ") at iter ", i, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad element")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if s != i/2 && s != i/2+1 {
|
|
|
|
|
print("bad steal ", s, ", want ", i/2, " or ", i/2+1, ", iter ", i, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad steal")
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func setMaxThreads(in int) (out int) {
|
|
|
|
|
lock(&sched.lock)
|
|
|
|
|
out = int(sched.maxmcount)
|
|
|
|
|
sched.maxmcount = int32(in)
|
|
|
|
|
checkmcount()
|
|
|
|
|
unlock(&sched.lock)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func haveexperiment(name string) bool {
|
|
|
|
|
x := goexperiment
|
|
|
|
|
for x != "" {
|
|
|
|
|
xname := ""
|
|
|
|
|
i := index(x, ",")
|
|
|
|
|
if i < 0 {
|
|
|
|
|
xname, x = x, ""
|
|
|
|
|
} else {
|
|
|
|
|
xname, x = x[:i], x[i+1:]
|
|
|
|
|
}
|
|
|
|
|
if xname == name {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
2014-12-22 13:27:53 -05:00
|
|
|
func procPin() int {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
mp := _g_.m
|
|
|
|
|
|
|
|
|
|
mp.locks++
|
2015-04-17 00:21:30 -04:00
|
|
|
return int(mp.p.ptr().id)
|
2014-11-11 17:08:33 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:nosplit
|
2014-12-22 13:27:53 -05:00
|
|
|
func procUnpin() {
|
2014-11-11 17:08:33 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
_g_.m.locks--
|
|
|
|
|
}
|
2014-12-22 13:27:53 -05:00
|
|
|
|
|
|
|
|
//go:linkname sync_runtime_procPin sync.runtime_procPin
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func sync_runtime_procPin() int {
|
|
|
|
|
return procPin()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:linkname sync_runtime_procUnpin sync.runtime_procUnpin
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func sync_runtime_procUnpin() {
|
|
|
|
|
procUnpin()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func sync_atomic_runtime_procPin() int {
|
|
|
|
|
return procPin()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func sync_atomic_runtime_procUnpin() {
|
|
|
|
|
procUnpin()
|
|
|
|
|
}
|
2015-02-20 11:50:56 +03:00
|
|
|
|
|
|
|
|
// Active spinning for sync.Mutex.
|
|
|
|
|
//go:linkname sync_runtime_canSpin sync.runtime_canSpin
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func sync_runtime_canSpin(i int) bool {
|
|
|
|
|
// sync.Mutex is cooperative, so we are conservative with spinning.
|
|
|
|
|
// Spin only few times and only if running on a multicore machine and
|
|
|
|
|
// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
|
|
|
|
|
// As opposed to runtime mutex we don't do passive spinning here,
|
|
|
|
|
// because there can be work on global runq on on other Ps.
|
|
|
|
|
if i >= active_spin || ncpu <= 1 || gomaxprocs <= int32(sched.npidle+sched.nmspinning)+1 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
2015-04-22 12:18:01 -04:00
|
|
|
if p := getg().m.p.ptr(); !runqempty(p) {
|
2015-02-20 11:50:56 +03:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//go:linkname sync_runtime_doSpin sync.runtime_doSpin
|
|
|
|
|
//go:nosplit
|
|
|
|
|
func sync_runtime_doSpin() {
|
|
|
|
|
procyield(active_spin_cnt)
|
|
|
|
|
}
|