runtime/metrics: add the last GC cycle that had the limiter enabled

This metric exports the the last GC cycle index that the GC limiter was
enabled. This metric is useful for debugging and identifying the root
cause of OOMs, especially when SetMemoryLimit is in use.

For #48409.

Change-Id: Ic6383b19e88058366a74f6ede1683b8ffb30a69c
Reviewed-on: https://go-review.googlesource.com/c/go/+/403614
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Michael Anthony Knyszek 2022-05-13 15:14:54 +00:00 committed by Michael Knyszek
parent 9bd6e2776f
commit cfccb5cb7c
4 changed files with 29 additions and 2 deletions

View file

@ -165,6 +165,12 @@ func initMetrics() {
out.scalar = uint64(in.heapStats.tinyAllocCount) out.scalar = uint64(in.heapStats.tinyAllocCount)
}, },
}, },
"/gc/limiter/last-enabled:gc-cycle": {
compute: func(_ *statAggregate, out *metricValue) {
out.kind = metricKindUint64
out.scalar = uint64(gcCPULimiter.lastEnabledCycle.Load())
},
},
"/gc/pauses:seconds": { "/gc/pauses:seconds": {
compute: func(_ *statAggregate, out *metricValue) { compute: func(_ *statAggregate, out *metricValue) {
hist := out.float64HistOrInit(timeHistBuckets) hist := out.float64HistOrInit(timeHistBuckets)

View file

@ -140,6 +140,15 @@ var allDesc = []Description{
Kind: KindUint64, Kind: KindUint64,
Cumulative: true, Cumulative: true,
}, },
{
Name: "/gc/limiter/last-enabled:gc-cycle",
Description: "GC cycle the last time the GC CPU limiter was enabled. " +
"This metric is useful for diagnosing the root cause of an out-of-memory " +
"error, because the limiter trades memory for CPU time when the GC's CPU " +
"time gets too high. This is most likely to occur with use of SetMemoryLimit. " +
"The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled.",
Kind: KindUint64,
},
{ {
Name: "/gc/pauses:seconds", Name: "/gc/pauses:seconds",
Description: "Distribution individual GC-related stop-the-world pause latencies.", Description: "Distribution individual GC-related stop-the-world pause latencies.",

View file

@ -102,6 +102,13 @@ Below is the full list of supported metrics, ordered lexicographically.
only their block. Each block is already accounted for in only their block. Each block is already accounted for in
allocs-by-size and frees-by-size. allocs-by-size and frees-by-size.
/gc/limiter/last-enabled:gc-cycle
GC cycle the last time the GC CPU limiter was enabled.
This metric is useful for diagnosing the root cause of an out-of-memory
error, because the limiter trades memory for CPU time when the GC's CPU
time gets too high. This is most likely to occur with use of SetMemoryLimit.
The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled.
/gc/pauses:seconds /gc/pauses:seconds
Distribution individual GC-related stop-the-world pause latencies. Distribution individual GC-related stop-the-world pause latencies.

View file

@ -40,8 +40,8 @@ type gcCPULimiterState struct {
// - fill <= capacity // - fill <= capacity
fill, capacity uint64 fill, capacity uint64
} }
// TODO(mknyszek): Export this as a runtime/metric to provide an estimate of // overflow is the cumulative amount of GC CPU time that we tried to fill the
// how much GC work is being dropped on the floor. // bucket with but exceeded its capacity.
overflow uint64 overflow uint64
// gcEnabled is an internal copy of gcBlackenEnabled that determines // gcEnabled is an internal copy of gcBlackenEnabled that determines
@ -65,6 +65,9 @@ type gcCPULimiterState struct {
// Updated under lock, but may be read concurrently. // Updated under lock, but may be read concurrently.
lastUpdate atomic.Int64 lastUpdate atomic.Int64
// lastEnabledCycle is the GC cycle that last had the limiter enabled.
lastEnabledCycle atomic.Uint32
// nprocs is an internal copy of gomaxprocs, used to determine total available // nprocs is an internal copy of gomaxprocs, used to determine total available
// CPU time. // CPU time.
// //
@ -203,6 +206,7 @@ func (l *gcCPULimiterState) accumulate(mutatorTime, gcTime int64) {
l.bucket.fill = l.bucket.capacity l.bucket.fill = l.bucket.capacity
if !enabled { if !enabled {
l.enabled.Store(true) l.enabled.Store(true)
l.lastEnabledCycle.Store(memstats.numgc + 1)
} }
return return
} }
@ -254,6 +258,7 @@ func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
if l.bucket.fill > l.bucket.capacity { if l.bucket.fill > l.bucket.capacity {
l.bucket.fill = l.bucket.capacity l.bucket.fill = l.bucket.capacity
l.enabled.Store(true) l.enabled.Store(true)
l.lastEnabledCycle.Store(memstats.numgc + 1)
} else if l.bucket.fill < l.bucket.capacity { } else if l.bucket.fill < l.bucket.capacity {
l.enabled.Store(false) l.enabled.Store(false)
} }