runtime: only use CPU time from the current window in the GC CPU limiter

Currently the GC CPU limiter consumes CPU time from a few pools, but
because the events that flush to those pools may overlap, rather than be
strictly contained within, the update window for the GC CPU limiter, the
limiter's accounting is ultimately sloppy.

This sloppiness complicates accounting for idle time more completely,
and makes reasoning about the transient behavior of the GC CPU limiter
much more difficult.

To remedy this, this CL adds a field to the P struct that tracks the
start time of any in-flight event the limiter might care about, along
with information about the nature of that event. This timestamp is
managed atomically so that the GC CPU limiter can come in and perform a
read of the partial CPU time consumed by a given event. The limiter also
updates the timestamp so that only what's left over is flushed by the
event itself when it completes.

The end result of this change is that, since the GC CPU limiter is aware
of all past completed events, and all in-flight events, it can much more
accurately collect the CPU time of events since the last update. There's
still the possibility for skew, but any leftover time will be captured
in the following update, and the magnitude of this leftover time is
effectively bounded by the update period of the GC CPU limiter, which is
much easier to consider.

One caveat of managing this timestamp-type combo atomically is that they
need to be packed in 64 bits. So, this CL gives up the top 3 bits of the
timestamp and places the type information there. What this means is we
effectively have only a 61-bit resolution timestamp. This is fine when
the top 3 bits are the same between calls to nanotime, but becomes a
problem on boundaries when those 3 bits change. These cases may cause
hiccups in the GC CPU limiter by not accounting for some source of CPU
time correctly, but with 61 bits of resolution this should be extremely
rare. The rate of update is on the order of milliseconds, so at worst
the runtime will be off of any given measurement by only a few
CPU-milliseconds (and this is directly bounded by the rate of update).
We're probably more inaccurate from the fact that we don't measure real
CPU time but only approximate it.

For #52890.

Change-Id: I347f30ac9e2ba6061806c21dfe0193ef2ab3bbe9
Reviewed-on: https://go-review.googlesource.com/c/go/+/410120
Reviewed-by: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Michael Anthony Knyszek 2022-06-02 19:06:27 +00:00 committed by Michael Knyszek
parent 162b88265e
commit d7941030c9
6 changed files with 203 additions and 7 deletions

View file

@ -1415,6 +1415,7 @@ func NewGCCPULimiter(now int64, gomaxprocs int32) *GCCPULimiter {
// on a 32-bit architecture, it may get allocated unaligned // on a 32-bit architecture, it may get allocated unaligned
// space. // space.
l := Escape(new(GCCPULimiter)) l := Escape(new(GCCPULimiter))
l.limiter.test = true
l.limiter.resetCapacity(now, gomaxprocs) l.limiter.resetCapacity(now, gomaxprocs)
return l return l
} }

View file

@ -1281,6 +1281,10 @@ func gcBgMarkWorker() {
startTime := nanotime() startTime := nanotime()
pp.gcMarkWorkerStartTime = startTime pp.gcMarkWorkerStartTime = startTime
var trackLimiterEvent bool
if pp.gcMarkWorkerMode == gcMarkWorkerIdleMode {
trackLimiterEvent = pp.limiterEvent.start(limiterEventIdleMarkWork, startTime)
}
decnwait := atomic.Xadd(&work.nwait, -1) decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc { if decnwait == work.nproc {
@ -1329,9 +1333,8 @@ func gcBgMarkWorker() {
now := nanotime() now := nanotime()
duration := now - startTime duration := now - startTime
gcController.markWorkerStop(pp.gcMarkWorkerMode, duration) gcController.markWorkerStop(pp.gcMarkWorkerMode, duration)
if pp.gcMarkWorkerMode == gcMarkWorkerIdleMode { if trackLimiterEvent {
gcCPULimiter.addIdleMarkTime(duration) pp.limiterEvent.stop(limiterEventIdleMarkWork, now)
gcCPULimiter.update(now)
} }
if pp.gcMarkWorkerMode == gcMarkWorkerFractionalMode { if pp.gcMarkWorkerMode == gcMarkWorkerFractionalMode {
atomic.Xaddint64(&pp.gcFractionalMarkTime, duration) atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)

View file

@ -76,6 +76,9 @@ type gcCPULimiterState struct {
// //
// gomaxprocs isn't used directly so as to keep this structure unit-testable. // gomaxprocs isn't used directly so as to keep this structure unit-testable.
nprocs int32 nprocs int32
// test indicates whether this instance of the struct was made for testing purposes.
test bool
} }
// limiting returns true if the CPU limiter is currently enabled, meaning the Go GC // limiting returns true if the CPU limiter is currently enabled, meaning the Go GC
@ -192,6 +195,33 @@ func (l *gcCPULimiterState) updateLocked(now int64) {
l.idleMarkTimePool.Add(-idleMarkTime) l.idleMarkTimePool.Add(-idleMarkTime)
} }
if !l.test {
// Consume time from in-flight events. Make sure we're not preemptible so allp can't change.
//
// The reason we do this instead of just waiting for those events to finish and push updates
// is to ensure that all the time we're accounting for happened sometime between lastUpdate
// and now. This dramatically simplifies reasoning about the limiter because we're not at
// risk of extra time being accounted for in this window than actually happened in this window,
// leading to all sorts of weird transient behavior.
mp := acquirem()
for _, pp := range allp {
typ, duration := pp.limiterEvent.consume(now)
switch typ {
case limiterEventIdleMarkWork:
idleMarkTime += duration
case limiterEventMarkAssist:
fallthrough
case limiterEventScavengeAssist:
assistTime += duration
case limiterEventNone:
break
default:
throw("invalid limiter event type found")
}
}
releasem(mp)
}
// Compute total GC time. // Compute total GC time.
windowGCTime := assistTime windowGCTime := assistTime
if l.gcEnabled { if l.gcEnabled {
@ -300,3 +330,144 @@ func (l *gcCPULimiterState) resetCapacity(now int64, nprocs int32) {
} }
l.unlock() l.unlock()
} }
// limiterEventType indicates the type of an event occuring on some P.
//
// These events represent the full set of events that the GC CPU limiter tracks
// to execute its function.
//
// This type may use no more than limiterEventBits bits of information.
type limiterEventType uint8
const (
limiterEventNone limiterEventType = iota // None of the following events.
limiterEventIdleMarkWork // Refers to an idle mark worker (see gcMarkWorkerMode).
limiterEventMarkAssist // Refers to mark assist (see gcAssistAlloc).
limiterEventScavengeAssist // Refers to a scavenge assist (see allocSpan).
limiterEventBits = 3
)
// limiterEventTypeMask is a mask for the bits in p.limiterEventStart that represent
// the event type. The rest of the bits of that field represent a timestamp.
const (
limiterEventTypeMask = uint64((1<<limiterEventBits)-1) << (64 - limiterEventBits)
limiterEventStampNone = limiterEventStamp(0)
)
// limiterEventStamp is a nanotime timestamp packed with a limiterEventType.
type limiterEventStamp uint64
// makeLimiterEventStamp creates a new stamp from the event type and the current timestamp.
func makeLimiterEventStamp(typ limiterEventType, now int64) limiterEventStamp {
return limiterEventStamp(uint64(typ)<<(64-limiterEventBits) | (uint64(now) &^ limiterEventTypeMask))
}
// duration computes the difference between now and the start time stored in the stamp.
//
// Returns 0 if the difference is negative, which may happen if now is stale or if the
// before and after timestamps cross a 2^(64-limiterEventBits) boundary.
func (s limiterEventStamp) duration(now int64) int64 {
// The top limiterEventBits bits of the timestamp are derived from the current time
// when computing a duration.
start := int64((uint64(now) & limiterEventTypeMask) | (uint64(s) &^ limiterEventTypeMask))
if now < start {
return 0
}
return now - start
}
// type extracts the event type from the stamp.
func (s limiterEventStamp) typ() limiterEventType {
return limiterEventType(s >> (64 - limiterEventBits))
}
// limiterEvent represents tracking state for an event tracked by the GC CPU limiter.
type limiterEvent struct {
stamp atomic.Uint64 // Stores a limiterEventStamp.
}
// start begins tracking a new limiter event of the current type. If an event
// is already in flight, then a new event cannot begin because the current time is
// already being attributed to that event. In this case, this function returns false.
// Otherwise, it returns true.
//
// The caller must be non-preemptible until at least stop is called or this function
// returns false. Because this is trying to measure "on-CPU" time of some event, getting
// scheduled away during it can mean that whatever we're measuring isn't a reflection
// of "on-CPU" time. The OS could deschedule us at any time, but we want to maintain as
// close of an approximation as we can.
func (e *limiterEvent) start(typ limiterEventType, now int64) bool {
if limiterEventStamp(e.stamp.Load()).typ() != limiterEventNone {
return false
}
e.stamp.Store(uint64(makeLimiterEventStamp(typ, now)))
return true
}
// consume acquires the partial event CPU time from any in-flight event.
// It achieves this by storing the current time as the new event time.
//
// Returns the type of the in-flight event, as well as how long it's currently been
// executing for. Returns limiterEventNone if no event is active.
func (e *limiterEvent) consume(now int64) (typ limiterEventType, duration int64) {
// Read the limiter event timestamp and update it to now.
for {
old := limiterEventStamp(e.stamp.Load())
typ = old.typ()
if typ == limiterEventNone {
// There's no in-flight event, so just push that up.
return
}
duration = old.duration(now)
if duration == 0 {
// We might have a stale now value, or this crossed the
// 2^(64-limiterEventBits) boundary in the clock readings.
// Just ignore it.
return limiterEventNone, 0
}
new := makeLimiterEventStamp(typ, now)
if e.stamp.CompareAndSwap(uint64(old), uint64(new)) {
break
}
}
return
}
// stop stops the active limiter event. Throws if the
//
// The caller must be non-preemptible across the event. See start as to why.
func (e *limiterEvent) stop(typ limiterEventType, now int64) {
var stamp limiterEventStamp
for {
stamp = limiterEventStamp(e.stamp.Load())
if stamp.typ() != typ {
print("runtime: want=", typ, " got=", stamp.typ(), "\n")
throw("limiterEvent.stop: found wrong event in p's limiter event slot")
}
if e.stamp.CompareAndSwap(uint64(stamp), uint64(limiterEventStampNone)) {
break
}
}
duration := stamp.duration(now)
if duration == 0 {
// It's possible that we're missing time because we crossed a
// 2^(64-limiterEventBits) boundary between the start and end.
// In this case, we're dropping that information. This is OK because
// at worst it'll cause a transient hiccup that will quickly resolve
// itself as all new timestamps begin on the other side of the boundary.
// Such a hiccup should be incredibly rare.
return
}
// Account for the event.
switch typ {
case limiterEventIdleMarkWork:
gcCPULimiter.addIdleMarkTime(duration)
case limiterEventMarkAssist:
fallthrough
case limiterEventScavengeAssist:
gcCPULimiter.addAssistTime(duration)
default:
throw("limiterEvent.stop: invalid limiter event type found")
}
}

View file

@ -545,7 +545,11 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
// Track time spent in this assist. Since we're on the // Track time spent in this assist. Since we're on the
// system stack, this is non-preemptible, so we can // system stack, this is non-preemptible, so we can
// just measure start and end time. // just measure start and end time.
//
// Limiter event tracking might be disabled if we end up here
// while on a mark worker.
startTime := nanotime() startTime := nanotime()
trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
decnwait := atomic.Xadd(&work.nwait, -1) decnwait := atomic.Xadd(&work.nwait, -1)
if decnwait == work.nproc { if decnwait == work.nproc {
@ -593,9 +597,11 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
duration := now - startTime duration := now - startTime
_p_ := gp.m.p.ptr() _p_ := gp.m.p.ptr()
_p_.gcAssistTime += duration _p_.gcAssistTime += duration
if trackLimiterEvent {
_p_.limiterEvent.stop(limiterEventMarkAssist, now)
}
if _p_.gcAssistTime > gcAssistTimeSlack { if _p_.gcAssistTime > gcAssistTimeSlack {
gcController.assistTime.Add(_p_.gcAssistTime) gcController.assistTime.Add(_p_.gcAssistTime)
gcCPULimiter.addAssistTime(_p_.gcAssistTime)
gcCPULimiter.update(now) gcCPULimiter.update(now)
_p_.gcAssistTime = 0 _p_.gcAssistTime = 0
} }

View file

@ -1301,17 +1301,29 @@ HaveSpan:
} }
} }
} }
if bytesToScavenge > 0 { // There are a few very limited cirumstances where we won't have a P here.
// It's OK to simply skip scavenging in these cases. Something else will notice
// and pick up the tab.
if pp != nil && bytesToScavenge > 0 {
// Measure how long we spent scavenging and add that measurement to the assist // Measure how long we spent scavenging and add that measurement to the assist
// time so we can track it for the GC CPU limiter. // time so we can track it for the GC CPU limiter.
//
// Limiter event tracking might be disabled if we end up here
// while on a mark worker.
start := nanotime() start := nanotime()
track := pp.limiterEvent.start(limiterEventScavengeAssist, start)
// Scavenge, but back out if the limiter turns on.
h.pages.scavenge(bytesToScavenge, func() bool { h.pages.scavenge(bytesToScavenge, func() bool {
return gcCPULimiter.limiting() return gcCPULimiter.limiting()
}) })
// Finish up accounting.
now := nanotime() now := nanotime()
if track {
pp.limiterEvent.stop(limiterEventScavengeAssist, now)
}
h.pages.scav.assistTime.Add(now - start) h.pages.scav.assistTime.Add(now - start)
gcCPULimiter.addAssistTime(now - start)
gcCPULimiter.update(now)
} }
// Commit and account for any scavenged memory that the span now owns. // Commit and account for any scavenged memory that the span now owns.

View file

@ -686,6 +686,9 @@ type p struct {
gcAssistTime int64 // Nanoseconds in assistAlloc gcAssistTime int64 // Nanoseconds in assistAlloc
gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic) gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic)
// limiterEvent tracks events for the GC CPU limiter.
limiterEvent limiterEvent
// gcMarkWorkerMode is the mode for the next mark worker to run in. // gcMarkWorkerMode is the mode for the next mark worker to run in.
// That is, this is used to communicate with the worker goroutine // That is, this is used to communicate with the worker goroutine
// selected for immediate execution by // selected for immediate execution by