runtime: separate scavenged spans

This change adds a new treap to mheap which contains scavenged (i.e. its physical pages were returned to the OS) spans. As of this change, spans may no longer be partially scavenged. For #14045. Change-Id: I0d428a255c6d3f710b9214b378f841b997df0993 Reviewed-on: https://go-review.googlesource.com/c/139298 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2025-12-08 06:10:04 +00:00 · 2018-10-02 21:39:20 +00:00 · 2018-10-02 21:39:20 +00:00 · b46bf0240c
commit b46bf0240c
parent 239341f3b6
3 changed files with 104 additions and 55 deletions
--- a/src/runtime/mgclarge.go
+++ b/src/runtime/mgclarge.go
@ -211,7 +211,6 @@ func (root *mTreap) removeNode(t *treapNode) {
 	if t.spanKey.npages != t.npagesKey {
 		throw("span and treap node npages do not match")
 	}
 	// Rotate t down to be leaf of tree for removal, respecting priorities.
 	for t.right != nil || t.left != nil {
 		if t.right == nil || t.left != nil && t.left.priority < t.right.priority {
@ -281,17 +280,6 @@ func (root *mTreap) removeSpan(span *mspan) {
 	root.removeNode(t)
 }
 // scavengetreap visits each node in the treap and scavenges the
 // treapNode's span.
 func scavengetreap(treap *treapNode, now, limit uint64) uintptr {
 	if treap == nil {
 		return 0
 	}
 	return scavengeTreapNode(treap, now, limit) +
 		scavengetreap(treap.left, now, limit) +
 		scavengetreap(treap.right, now, limit)
 }
 // rotateLeft rotates the tree rooted at node x.
 // turning (x a (y b c)) into (y (x a b) c).
 func (root *mTreap) rotateLeft(x *treapNode) {
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@ -30,7 +30,8 @@ const minPhysPageSize = 4096
 //go:notinheap
 type mheap struct {
 	lock      mutex
-	free      mTreap    // free treap of spans
+	free      mTreap    // free and non-scavenged spans
 	scav      mTreap    // free and scavenged spans
 	busy      mSpanList // busy list of spans
 	sweepgen  uint32    // sweep generation, see comment in mspan
 	sweepdone uint32    // all spans are swept
@ -60,7 +61,7 @@ type mheap struct {
 	// on the swept stack.
 	sweepSpans [2]gcSweepBuf
-	//_ uint32 // align uint64 fields on 32-bit for atomics
+	_ uint32 // align uint64 fields on 32-bit for atomics
 	// Proportional sweep
 	//
@ -132,7 +133,7 @@ type mheap struct {
 	// (the actual arenas). This is only used on 32-bit.
 	arena linearAlloc
-	//_ uint32 // ensure 64-bit alignment of central
+	// _ uint32 // ensure 64-bit alignment of central
 	// central free lists for small size classes.
 	// the padding makes sure that the MCentrals are
@ -840,18 +841,31 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
 func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
 	var s *mspan
-	// Best fit in the treap of spans.
+	// First, attempt to allocate from free spans, then from
 	// scavenged spans, looking for best fit in each.
 	s = h.free.remove(npage)
-	if s == nil {
+	if s != nil {
-		if !h.grow(npage) {
+		goto HaveSpan
 			return nil
 		}
 		s = h.free.remove(npage)
 		if s == nil {
 			return nil
 		}
 	}
 	s = h.scav.remove(npage)
 	if s != nil {
 		goto HaveSpan
 	}
 	// On failure, grow the heap and try again.
 	if !h.grow(npage) {
 		return nil
 	}
 	s = h.free.remove(npage)
 	if s != nil {
 		goto HaveSpan
 	}
 	s = h.scav.remove(npage)
 	if s != nil {
 		goto HaveSpan
 	}
 	return nil
 HaveSpan:
 	// Mark span in use.
 	if s.state != mSpanFree {
 		throw("MHeap_AllocLocked - MSpan not free")
@ -1002,19 +1016,29 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
 	if unusedsince == 0 {
 		s.unusedsince = nanotime()
 	}
-	s.npreleased = 0
+
 	// We scavenge s at the end after coalescing if s or anything
 	// it merged with is marked scavenged.
 	needsScavenge := s.npreleased != 0
 	prescavenged := s.npreleased * pageSize // number of bytes already scavenged.
 	// Coalesce with earlier, later spans.
 	if before := spanOf(s.base() - 1); before != nil && before.state == mSpanFree {
 		// Now adjust s.
 		s.startAddr = before.startAddr
 		s.npages += before.npages
 		s.npreleased = before.npreleased // absorb released pages
 		s.needzero |= before.needzero
 		h.setSpan(before.base(), s)
 		s.npreleased += before.npreleased // absorb released pages
 		// The size is potentially changing so the treap needs to delete adjacent nodes and
 		// insert back as a combined node.
-		h.free.removeSpan(before)
+		if before.npreleased == 0 {
 			h.free.removeSpan(before)
 		} else {
 			h.scav.removeSpan(before)
 			needsScavenge = true
 			prescavenged += before.npreleased * pageSize
 		}
 		before.state = mSpanDead
 		h.spanalloc.free(unsafe.Pointer(before))
 	}
@ -1022,26 +1046,77 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
 	// Now check to see if next (greater addresses) span is free and can be coalesced.
 	if after := spanOf(s.base() + s.npages*pageSize); after != nil && after.state == mSpanFree {
 		s.npages += after.npages
 		s.npreleased += after.npreleased
 		s.needzero |= after.needzero
 		h.setSpan(s.base()+s.npages*pageSize-1, s)
-		h.free.removeSpan(after)
+		if after.npreleased == 0 {
 			h.free.removeSpan(after)
 		} else {
 			h.scav.removeSpan(after)
 			needsScavenge = true
 			prescavenged += after.npreleased * pageSize
 		}
 		s.npreleased += after.npreleased
 		after.state = mSpanDead
 		h.spanalloc.free(unsafe.Pointer(after))
 	}
-	// Insert s into the free treap.
+	if needsScavenge {
-	h.free.insert(s)
+		// When coalescing spans, some physical pages which
 		// were not returned to the OS previously because
 		// they were only partially covered by the span suddenly
 		// become available for scavenging. We want to make sure
 		// those holes are filled in, and the span is properly
 		// scavenged. Rather than trying to detect those holes
 		// directly, we collect how many bytes were already
 		// scavenged above and subtract that from heap_released
 		// before re-scavenging the entire newly-coalesced span,
 		// which will implicitly bump up heap_released.
 		memstats.heap_released -= uint64(prescavenged)
 		s.scavenge()
 	}
 	// Insert s into the appropriate treap.
 	if s.npreleased != 0 {
 		h.scav.insert(s)
 	} else {
 		h.free.insert(s)
 	}
 }
-func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
+// scavengeAll visits each node in the unscav treap and scavenges the
-	s := t.spanKey
+// treapNode's span. It then removes the scavenged span from
-	if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
+// unscav and adds it into scav before continuing. h must be locked.
-		if released := s.scavenge(); released != 0 {
+func (h *mheap) scavengeAll(now, limit uint64) uintptr {
-			return released
+	// Compute the left-most child in unscav to start iteration from.
-		}
+	t := h.free.treap
 	if t == nil {
 		return 0
 	}
-	return 0
+	for t.left != nil {
 		t = t.left
 	}
 	// Iterate over the treap be computing t's successor before
 	// potentially scavenging it.
 	released := uintptr(0)
 	for t != nil {
 		s := t.spanKey
 		next := t.succ()
 		if (now-uint64(s.unusedsince)) > limit {
 			r := s.scavenge()
 			if r != 0 {
 				// If we ended up scavenging s, then remove it from unscav
 				// and add it to scav. This is safe to do since we've already
 				// moved to t's successor.
 				h.free.removeNode(t)
 				h.scav.insert(s)
 				released += r
 			}
 		}
 		// Move t forward to its successor to iterate over the whole
 		// treap.
 		t = next
 	}
 	return released
 }
 func (h *mheap) scavenge(k int32, now, limit uint64) {
@ -1051,13 +1126,13 @@ func (h *mheap) scavenge(k int32, now, limit uint64) {
 	gp := getg()
 	gp.m.mallocing++
 	lock(&h.lock)
-	sumreleased := scavengetreap(h.free.treap, now, limit)
+	released := h.scavengeAll(now, limit)
 	unlock(&h.lock)
 	gp.m.mallocing--
 	if debug.gctrace > 0 {
-		if sumreleased > 0 {
+		if released > 0 {
-			print("scvg", k, ": ", sumreleased>>20, " MB released\n")
+			print("scvg", k, ": ", released>>20, " MB released\n")
 		}
 		print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
 	}
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@ -42,20 +42,6 @@ type mstats struct {
 	heap_released uint64 // bytes released to the os
 	heap_objects  uint64 // total number of allocated objects
 	// TODO(austin): heap_released is both useless and inaccurate
 	// in its current form. It's useless because, from the user's
 	// and OS's perspectives, there's no difference between a page
 	// that has not yet been faulted in and a page that has been
 	// released back to the OS. We could fix this by considering
 	// newly mapped spans to be "released". It's inaccurate
 	// because when we split a large span for allocation, we
 	// "unrelease" all pages in the large span and not just the
 	// ones we split off for use. This is trickier to fix because
 	// we currently don't know which pages of a span we've
 	// released. We could fix it by separating "free" and
 	// "released" spans, but then we have to allocate from runs of
 	// free and released spans.
 	// Statistics about allocation of low-level fixed-size structures.
 	// Protected by FixAlloc locks.
 	stacks_inuse uint64 // bytes in manually-managed stack spans