runtime: flush mcaches lazily

Currently, all mcaches are flushed during STW mark termination as a root marking job. This is currently necessary because all spans must be out of these caches before sweeping begins to avoid races with allocation and to ensure the spans are in the state expected by sweeping. We do it as a root marking job because mcache flushing is somewhat expensive and O(GOMAXPROCS) and this parallelizes the work across the Ps. However, it's also the last remaining root marking job performed during mark termination. This CL moves mcache flushing out of mark termination and performs it lazily. We keep track of the last sweepgen at which each mcache was flushed and as each P is woken from STW, it observes that its mcache is out-of-date and flushes it. The introduces a complication for spans cached in stale mcaches. These may now be observed by background or proportional sweeping or when attempting to add a finalizer, but aren't in a stable state. For example, they are likely to be on the wrong mcentral list. To fix this, this CL extends the sweepgen protocol to also capture whether a span is cached and, if so, whether or not its cache is stale. This protocol blocks asynchronous sweeping from touching cached spans and makes it the responsibility of mcache flushing to sweep the flushed spans. This eliminates the last mark termination root marking job, which means we can now eliminate that entire infrastructure. Updates #26903. This implements lazy mcache flushing. Change-Id: Iadda7aabe540b2026cffc5195da7be37d5b4125e Reviewed-on: https://go-review.googlesource.com/c/134783 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
2025-12-08 06:10:04 +00:00 · 2018-08-23 13:14:19 -04:00 · 2018-08-23 13:14:19 -04:00 · 873bd47dfb
commit 873bd47dfb
parent 457c8f4fe9
7 changed files with 119 additions and 29 deletions
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@ -4,7 +4,10 @@

 package runtime

-import "unsafe"
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)

 // Per-thread (in Go, per-P) cache for small objects.
 // No locking needed because it is per-thread (per-P).
@ -42,6 +45,12 @@ type mcache struct {
 	local_largefree  uintptr                  // bytes freed for large objects (>maxsmallsize)
 	local_nlargefree uintptr                  // number of frees for large objects (>maxsmallsize)
 	local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
+
+	// flushGen indicates the sweepgen during which this mcache
+	// was last flushed. If flushGen != mheap_.sweepgen, the spans
+	// in this mcache are stale and need to the flushed so they
+	// can be swept. This is done in acquirep.
+	flushGen uint32
 }

 // A gclink is a node in a linked list of blocks, like mlink,
@ -76,6 +85,7 @@ var emptymspan mspan
 func allocmcache() *mcache {
 	lock(&mheap_.lock)
 	c := (*mcache)(mheap_.cachealloc.alloc())
+	c.flushGen = mheap_.sweepgen
 	unlock(&mheap_.lock)
 	for i := range c.alloc {
 		c.alloc[i] = &emptymspan
@ -113,9 +123,12 @@ func (c *mcache) refill(spc spanClass) {
 	if uintptr(s.allocCount) != s.nelems {
 		throw("refill of span with free space remaining")
 	}
-
 	if s != &emptymspan {
-		s.incache = false
+		// Mark this span as no longer cached.
+		if s.sweepgen != mheap_.sweepgen+3 {
+			throw("bad sweepgen in refill")
+		}
+		atomic.Store(&s.sweepgen, mheap_.sweepgen)
 	}

 	// Get a new cached span from the central lists.
@ -128,6 +141,10 @@ func (c *mcache) refill(spc spanClass) {
 		throw("span has no free space")
 	}

+	// Indicate that this span is cached and prevent asynchronous
+	// sweeping in the next sweep phase.
+	s.sweepgen = mheap_.sweepgen + 3
+
 	c.alloc[spc] = s
 }

@ -143,3 +160,26 @@ func (c *mcache) releaseAll() {
 	c.tiny = 0
 	c.tinyoffset = 0
 }
+
+// prepareForSweep flushes c if the system has entered a new sweep phase
+// since c was populated. This must happen between the sweep phase
+// starting and the first allocation from c.
+func (c *mcache) prepareForSweep() {
+	// Alternatively, instead of making sure we do this on every P
+	// between starting the world and allocating on that P, we
+	// could leave allocate-black on, allow allocation to continue
+	// as usual, use a ragged barrier at the beginning of sweep to
+	// ensure all cached spans are swept, and then disable
+	// allocate-black. However, with this approach it's difficult
+	// to avoid spilling mark bits into the *next* GC cycle.
+	sg := mheap_.sweepgen
+	if c.flushGen == sg {
+		return
+	} else if c.flushGen != sg-2 {
+		println("bad flushGen", c.flushGen, "in prepareForSweep; sweepgen", sg)
+		throw("bad flushGen")
+	}
+	c.releaseAll()
+	stackcache_clear(c)
+	atomic.Store(&c.flushGen, mheap_.sweepgen) // Synchronizes with gcStart
+}