mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
runtime: track the amount of scannable allocated stack for the GC pacer
This change adds two fields to gcControllerState: stackScan, used for pacing decisions, and scannableStackSize, which directly tracks the amount of space allocated for inuse stacks that will be scanned. scannableStackSize is not updated directly, but is instead flushed from each P when at an least 8 KiB delta has accumulated. This helps reduce issues with atomics contention for newly created goroutines. Stack growth paths are largely unaffected. StackGrowth-48 51.4ns ± 0% 51.4ns ± 0% ~ (p=0.927 n=10+10) StackGrowthDeep-48 6.14µs ± 3% 6.25µs ± 4% ~ (p=0.090 n=10+9) CreateGoroutines-48 273ns ± 1% 273ns ± 1% ~ (p=0.676 n=9+10) CreateGoroutinesParallel-48 65.5ns ± 5% 66.6ns ± 7% ~ (p=0.340 n=9+9) CreateGoroutinesCapture-48 2.06µs ± 1% 2.07µs ± 4% ~ (p=0.217 n=10+10) CreateGoroutinesSingle-48 550ns ± 3% 563ns ± 4% +2.41% (p=0.034 n=8+10) For #44167. Change-Id: Id1800d41d3a6c211b43aeb5681c57c0dc8880daf Reviewed-on: https://go-review.googlesource.com/c/go/+/309589 Trust: Michael Knyszek <mknyszek@google.com> Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
parent
8e112a7c2a
commit
9ac1ee2d46
4 changed files with 45 additions and 2 deletions
|
|
@ -47,6 +47,10 @@ const (
|
|||
|
||||
// defaultHeapMinimum is the value of heapMinimum for GOGC==100.
|
||||
defaultHeapMinimum = 4 << 20
|
||||
|
||||
// scannableStackSizeSlack is the bytes of stack space allocated or freed
|
||||
// that can accumulate on a P before updating gcController.stackSize.
|
||||
scannableStackSizeSlack = 8 << 10
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
|
@ -166,6 +170,18 @@ type gcControllerState struct {
|
|||
// Read and written atomically or with the world stopped.
|
||||
heapScan uint64
|
||||
|
||||
// stackScan is a snapshot of scannableStackSize taken at each GC
|
||||
// STW pause and is used in pacing decisions.
|
||||
//
|
||||
// Updated only while the world is stopped.
|
||||
stackScan uint64
|
||||
|
||||
// scannableStackSize is the amount of allocated goroutine stack space in
|
||||
// use by goroutines.
|
||||
//
|
||||
// Read and updated atomically.
|
||||
scannableStackSize uint64
|
||||
|
||||
// heapMarked is the number of bytes marked by the previous
|
||||
// GC. After mark termination, heapLive == heapMarked, but
|
||||
// unlike heapLive, heapMarked does not change until the
|
||||
|
|
@ -276,6 +292,7 @@ func (c *gcControllerState) startCycle(markStartTime int64) {
|
|||
c.fractionalMarkTime = 0
|
||||
c.idleMarkTime = 0
|
||||
c.markStartTime = markStartTime
|
||||
c.stackScan = atomic.Load64(&c.scannableStackSize)
|
||||
|
||||
// Ensure that the heap goal is at least a little larger than
|
||||
// the current live heap size. This may not be the case if GC
|
||||
|
|
@ -686,6 +703,18 @@ func (c *gcControllerState) update(dHeapLive, dHeapScan int64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *gcControllerState) addScannableStack(pp *p, amount int64) {
|
||||
if pp == nil {
|
||||
atomic.Xadd64(&c.scannableStackSize, amount)
|
||||
return
|
||||
}
|
||||
pp.scannableStackSizeDelta += amount
|
||||
if pp.scannableStackSizeDelta >= scannableStackSizeSlack || pp.scannableStackSizeDelta <= -scannableStackSizeSlack {
|
||||
atomic.Xadd64(&c.scannableStackSize, pp.scannableStackSizeDelta)
|
||||
pp.scannableStackSizeDelta = 0
|
||||
}
|
||||
}
|
||||
|
||||
// commit sets the trigger ratio and updates everything
|
||||
// derived from it: the absolute trigger, the heap goal, mark pacing,
|
||||
// and sweep pacing.
|
||||
|
|
|
|||
|
|
@ -3623,8 +3623,10 @@ func goexit1() {
|
|||
// goexit continuation on g0.
|
||||
func goexit0(gp *g) {
|
||||
_g_ := getg()
|
||||
_p_ := _g_.m.p.ptr()
|
||||
|
||||
casgstatus(gp, _Grunning, _Gdead)
|
||||
gcController.addScannableStack(_p_, -int64(gp.stack.hi-gp.stack.lo))
|
||||
if isSystemGoroutine(gp, false) {
|
||||
atomic.Xadd(&sched.ngsys, -1)
|
||||
}
|
||||
|
|
@ -3655,7 +3657,7 @@ func goexit0(gp *g) {
|
|||
dropg()
|
||||
|
||||
if GOARCH == "wasm" { // no threads yet on wasm
|
||||
gfput(_g_.m.p.ptr(), gp)
|
||||
gfput(_p_, gp)
|
||||
schedule() // never returns
|
||||
}
|
||||
|
||||
|
|
@ -3663,7 +3665,7 @@ func goexit0(gp *g) {
|
|||
print("invalid m->lockedInt = ", _g_.m.lockedInt, "\n")
|
||||
throw("internal lockOSThread error")
|
||||
}
|
||||
gfput(_g_.m.p.ptr(), gp)
|
||||
gfput(_p_, gp)
|
||||
if locked {
|
||||
// The goroutine may have locked this thread because
|
||||
// it put it in an unusual kernel state. Kill it
|
||||
|
|
@ -4292,6 +4294,7 @@ func newproc1(fn *funcval, callergp *g, callerpc uintptr) *g {
|
|||
newg.tracking = true
|
||||
}
|
||||
casgstatus(newg, _Gdead, _Grunnable)
|
||||
gcController.addScannableStack(_p_, int64(newg.stack.hi-newg.stack.lo))
|
||||
|
||||
if _p_.goidcache == _p_.goidcacheend {
|
||||
// Sched.goidgen is the last allocated id,
|
||||
|
|
|
|||
|
|
@ -734,6 +734,12 @@ type p struct {
|
|||
// Race context used while executing timer functions.
|
||||
timerRaceCtx uintptr
|
||||
|
||||
// scannableStackSizeDelta accumulates the amount of stack space held by
|
||||
// live goroutines (i.e. those eligible for stack scanning).
|
||||
// Flushed to gcController.scannableStackSize once scannableStackSizeSlack
|
||||
// or -scannableStackSizeSlack is reached.
|
||||
scannableStackSizeDelta int64
|
||||
|
||||
// preempt is set to indicate that this P should be enter the
|
||||
// scheduler ASAP (regardless of what G is running on it).
|
||||
preempt bool
|
||||
|
|
|
|||
|
|
@ -852,6 +852,11 @@ func copystack(gp *g, newsize uintptr) {
|
|||
throw("nil stackbase")
|
||||
}
|
||||
used := old.hi - gp.sched.sp
|
||||
// Add just the difference to gcController.addScannableStack.
|
||||
// g0 stacks never move, so this will never account for them.
|
||||
// It's also fine if we have no P, addScannableStack can deal with
|
||||
// that case.
|
||||
gcController.addScannableStack(getg().m.p.ptr(), int64(newsize)-int64(old.hi-old.lo))
|
||||
|
||||
// allocate new stack
|
||||
new := stackalloc(uint32(newsize))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue