mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime,runtime/metrics: add metric to track scheduling latencies
This change adds a metric to track scheduling latencies, defined as the cumulative amount of time a goroutine spends being runnable before running again. The metric is an approximations and samples instead of trying to record every goroutine scheduling latency. This change was primarily authored by mknyszek@google.com. Change-Id: Ie0be7e6e7be421572eb2317d3dd8dd6f3d6aa152 Reviewed-on: https://go-review.googlesource.com/c/go/+/308933 Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
105a6e9518
commit
bedfeed54a
7 changed files with 81 additions and 1 deletions
|
|
@ -81,6 +81,10 @@ type timeHistogram struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// record adds the given duration to the distribution.
|
// record adds the given duration to the distribution.
|
||||||
|
//
|
||||||
|
// Disallow preemptions and stack growths because this function
|
||||||
|
// may run in sensitive locations.
|
||||||
|
//go:nosplit
|
||||||
func (h *timeHistogram) record(duration int64) {
|
func (h *timeHistogram) record(duration int64) {
|
||||||
if duration < 0 {
|
if duration < 0 {
|
||||||
atomic.Xadd64(&h.underflow, 1)
|
atomic.Xadd64(&h.underflow, 1)
|
||||||
|
|
|
||||||
|
|
@ -245,6 +245,15 @@ func initMetrics() {
|
||||||
out.scalar = uint64(gcount())
|
out.scalar = uint64(gcount())
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
"/sched/latencies:seconds": {
|
||||||
|
compute: func(_ *statAggregate, out *metricValue) {
|
||||||
|
hist := out.float64HistOrInit(timeHistBuckets)
|
||||||
|
hist.counts[0] = atomic.Load64(&sched.timeToRun.underflow)
|
||||||
|
for i := range sched.timeToRun.counts {
|
||||||
|
hist.counts[i+1] = atomic.Load64(&sched.timeToRun.counts[i])
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
metricsInit = true
|
metricsInit = true
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -176,6 +176,11 @@ var allDesc = []Description{
|
||||||
Description: "Count of live goroutines.",
|
Description: "Count of live goroutines.",
|
||||||
Kind: KindUint64,
|
Kind: KindUint64,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "/sched/latencies:seconds",
|
||||||
|
Description: "Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running.",
|
||||||
|
Kind: KindFloat64Histogram,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// All returns a slice of containing metric descriptions for all supported metrics.
|
// All returns a slice of containing metric descriptions for all supported metrics.
|
||||||
|
|
|
||||||
|
|
@ -139,5 +139,9 @@ Below is the full list of supported metrics, ordered lexicographically.
|
||||||
|
|
||||||
/sched/goroutines:goroutines
|
/sched/goroutines:goroutines
|
||||||
Count of live goroutines.
|
Count of live goroutines.
|
||||||
|
|
||||||
|
/sched/latencies:seconds
|
||||||
|
Distribution of the time goroutines have spent in the scheduler
|
||||||
|
in a runnable state before actually running.
|
||||||
*/
|
*/
|
||||||
package metrics
|
package metrics
|
||||||
|
|
|
||||||
|
|
@ -697,6 +697,11 @@ func schedinit() {
|
||||||
sigsave(&_g_.m.sigmask)
|
sigsave(&_g_.m.sigmask)
|
||||||
initSigmask = _g_.m.sigmask
|
initSigmask = _g_.m.sigmask
|
||||||
|
|
||||||
|
if offset := unsafe.Offsetof(sched.timeToRun); offset%8 != 0 {
|
||||||
|
println(offset)
|
||||||
|
throw("sched.timeToRun not aligned to 8 bytes")
|
||||||
|
}
|
||||||
|
|
||||||
goargs()
|
goargs()
|
||||||
goenvs()
|
goenvs()
|
||||||
parsedebugvars()
|
parsedebugvars()
|
||||||
|
|
@ -973,6 +978,37 @@ func casgstatus(gp *g, oldval, newval uint32) {
|
||||||
nextYield = nanotime() + yieldDelay/2
|
nextYield = nanotime() + yieldDelay/2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle tracking for scheduling latencies.
|
||||||
|
if oldval == _Grunning {
|
||||||
|
// Track every 8th time a goroutine transitions out of running.
|
||||||
|
if gp.trackingSeq%gTrackingPeriod == 0 {
|
||||||
|
gp.tracking = true
|
||||||
|
}
|
||||||
|
gp.trackingSeq++
|
||||||
|
}
|
||||||
|
if gp.tracking {
|
||||||
|
now := nanotime()
|
||||||
|
if oldval == _Grunnable {
|
||||||
|
// We transitioned out of runnable, so measure how much
|
||||||
|
// time we spent in this state and add it to
|
||||||
|
// runnableTime.
|
||||||
|
gp.runnableTime += now - gp.runnableStamp
|
||||||
|
gp.runnableStamp = 0
|
||||||
|
}
|
||||||
|
if newval == _Grunnable {
|
||||||
|
// We just transitioned into runnable, so record what
|
||||||
|
// time that happened.
|
||||||
|
gp.runnableStamp = now
|
||||||
|
} else if newval == _Grunning {
|
||||||
|
// We're transitioning into running, so turn off
|
||||||
|
// tracking and record how much time we spent in
|
||||||
|
// runnable.
|
||||||
|
gp.tracking = false
|
||||||
|
sched.timeToRun.record(gp.runnableTime)
|
||||||
|
gp.runnableTime = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
|
// casgstatus(gp, oldstatus, Gcopystack), assuming oldstatus is Gwaiting or Grunnable.
|
||||||
|
|
@ -4286,6 +4322,11 @@ func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerp
|
||||||
if isSystemGoroutine(newg, false) {
|
if isSystemGoroutine(newg, false) {
|
||||||
atomic.Xadd(&sched.ngsys, +1)
|
atomic.Xadd(&sched.ngsys, +1)
|
||||||
}
|
}
|
||||||
|
// Track initial transition?
|
||||||
|
newg.trackingSeq = uint8(fastrand())
|
||||||
|
if newg.trackingSeq%gTrackingPeriod == 0 {
|
||||||
|
newg.tracking = true
|
||||||
|
}
|
||||||
casgstatus(newg, _Gdead, _Grunnable)
|
casgstatus(newg, _Gdead, _Grunnable)
|
||||||
|
|
||||||
if _p_.goidcache == _p_.goidcacheend {
|
if _p_.goidcache == _p_.goidcacheend {
|
||||||
|
|
|
||||||
|
|
@ -462,6 +462,10 @@ type g struct {
|
||||||
|
|
||||||
raceignore int8 // ignore race detection events
|
raceignore int8 // ignore race detection events
|
||||||
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
|
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
|
||||||
|
tracking bool // whether we're tracking this G for sched latency statistics
|
||||||
|
trackingSeq uint8 // used to decide whether to track this G
|
||||||
|
runnableStamp int64 // timestamp of when the G last became runnable, only used when tracking
|
||||||
|
runnableTime int64 // the amount of time spent runnable, cleared when running, only used when tracking
|
||||||
sysexitticks int64 // cputicks when syscall has returned (for tracing)
|
sysexitticks int64 // cputicks when syscall has returned (for tracing)
|
||||||
traceseq uint64 // trace event sequencer
|
traceseq uint64 // trace event sequencer
|
||||||
tracelastp puintptr // last P emitted an event for this goroutine
|
tracelastp puintptr // last P emitted an event for this goroutine
|
||||||
|
|
@ -493,6 +497,10 @@ type g struct {
|
||||||
gcAssistBytes int64
|
gcAssistBytes int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gTrackingPeriod is the number of transitions out of _Grunning between
|
||||||
|
// latency tracking runs.
|
||||||
|
const gTrackingPeriod = 8
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// tlsSlots is the number of pointer-sized slots reserved for TLS on some platforms,
|
// tlsSlots is the number of pointer-sized slots reserved for TLS on some platforms,
|
||||||
// like Windows.
|
// like Windows.
|
||||||
|
|
@ -824,6 +832,15 @@ type schedt struct {
|
||||||
// Acquire and hold this mutex to block sysmon from interacting
|
// Acquire and hold this mutex to block sysmon from interacting
|
||||||
// with the rest of the runtime.
|
// with the rest of the runtime.
|
||||||
sysmonlock mutex
|
sysmonlock mutex
|
||||||
|
|
||||||
|
_ uint32 // ensure timeToRun has 8-byte alignment
|
||||||
|
|
||||||
|
// timeToRun is a distribution of scheduling latencies, defined
|
||||||
|
// as the sum of time a G spends in the _Grunnable state before
|
||||||
|
// it transitions to _Grunning.
|
||||||
|
//
|
||||||
|
// timeToRun is protected by sched.lock.
|
||||||
|
timeToRun timeHistogram
|
||||||
}
|
}
|
||||||
|
|
||||||
// Values for the flags field of a sigTabT.
|
// Values for the flags field of a sigTabT.
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
|
||||||
_32bit uintptr // size on 32bit platforms
|
_32bit uintptr // size on 32bit platforms
|
||||||
_64bit uintptr // size on 64bit platforms
|
_64bit uintptr // size on 64bit platforms
|
||||||
}{
|
}{
|
||||||
{runtime.G{}, 216, 376}, // g, but exported for testing
|
{runtime.G{}, 236, 392}, // g, but exported for testing
|
||||||
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
|
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue