diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 9ff4259ce93..14748a43f12 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -12,7 +12,7 @@ type mcache struct { // The following members are accessed on every malloc, // so they are grouped here for better caching. next_sample int32 // trigger heap sample after allocating this many bytes - local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap + local_cachealloc intptr // bytes allocated from cache since last lock of heap // Allocator cache for tiny objects w/o pointers. // See "Tiny allocator" comment in malloc.go. tiny unsafe.Pointer diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 8e573aba5d8..865804470df 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -76,28 +76,25 @@ // Are things on the free lists black or white? How does the sweep phase work? // Concurrent sweep. +// // The sweep phase proceeds concurrently with normal program execution. // The heap is swept span-by-span both lazily (when a goroutine needs another span) // and concurrently in a background goroutine (this helps programs that are not CPU bound). -// However, at the end of the stop-the-world GC phase we don't know the size of the live heap, -// and so next_gc calculation is tricky and happens as follows. -// At the end of the stop-the-world phase next_gc is conservatively set based on total -// heap size; all spans are marked as "needs sweeping". -// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory. -// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc -// closer to the target value. However, this is not enough to avoid over-allocating memory. -// Consider that a goroutine wants to allocate a new span for a large object and -// there are no free swept spans, but there are small-object unswept spans. -// If the goroutine naively allocates a new span, it can surpass the yet-unknown -// target next_gc value. In order to prevent such cases (1) when a goroutine needs -// to allocate a new small-object span, it sweeps small-object spans for the same -// object size until it frees at least one object; (2) when a goroutine needs to -// allocate large-object span from heap, it sweeps spans until it frees at least -// that many pages into heap. Together these two measures ensure that we don't surpass -// target next_gc value by a large margin. There is an exception: if a goroutine sweeps -// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span, -// but there can still be other one-page unswept spans which could be combined into a -// two-page span. +// At the end of STW mark termination all spans are marked as "needs sweeping". +// +// The background sweeper goroutine simply sweeps spans one-by-one. +// +// To avoid requesting more OS memory while there are unswept spans, when a +// goroutine needs another span, it first attempts to reclaim that much memory +// by sweeping. When a goroutine needs to allocate a new small-object span, it +// sweeps small-object spans for the same object size until it frees at least +// one object. When a goroutine needs to allocate large-object span from heap, +// it sweeps spans until it frees at least that many pages into heap. There is +// one case where this may not suffice: if a goroutine sweeps and frees two +// nonadjacent one-page spans to the heap, it will allocate a new two-page +// span, but there can still be other one-page unswept spans which could be +// combined into a two-page span. +// // It's critical to ensure that no operations proceed on unswept spans (that would corrupt // mark bits in GC bitmap). During GC all mcaches are flushed into the central cache, // so they are empty. When a goroutine grabs a new span into mcache, it sweeps it. @@ -194,11 +191,11 @@ var triggerratio = int64(8) // have sufficient time to complete then more memory will be // requested from the OS increasing heap size thus allow future // GCs more time to complete. -// memstat.heap_alloc and memstat.next_gc reads have benign races +// memstat.heap_live read has a benign race. // A false negative simple does not start a GC, a false positive // will start a GC needlessly. Neither have correctness issues. func shouldtriggergc() bool { - return triggerratio*(int64(memstats.next_gc)-int64(memstats.heap_alloc)) <= int64(memstats.next_gc) && atomicloaduint(&bggc.working) == 0 + return triggerratio*(int64(memstats.next_gc)-int64(memstats.heap_live)) <= int64(memstats.next_gc) && atomicloaduint(&bggc.working) == 0 } var work struct { @@ -322,7 +319,7 @@ func gc(mode int) { if debug.gctrace > 0 { stwprocs, maxprocs = gcprocs(), gomaxprocs tSweepTerm = nanotime() - heap0 = memstats.heap_alloc + heap0 = memstats.heap_live } if trace.enabled { @@ -401,7 +398,7 @@ func gc(mode int) { gcphase = _GCmarktermination if debug.gctrace > 0 { - heap1 = memstats.heap_alloc + heap1 = memstats.heap_live } startTime := nanotime() @@ -593,15 +590,16 @@ func gcMark(start_time int64) { shrinkfinish() cachestats() - // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap - // conservatively set next_gc to high value assuming that everything is live - // concurrent/lazy sweep will reduce this number while discovering new garbage - memstats.next_gc = memstats.heap_alloc + memstats.heap_alloc*uint64(gcpercent)/100 + + // compute next_gc + memstats.heap_live = work.bytesMarked + memstats.next_gc = memstats.heap_live + memstats.heap_live*uint64(gcpercent)/100 if memstats.next_gc < heapminimum { memstats.next_gc = heapminimum } if trace.enabled { + traceHeapAlloc() traceNextGC() } diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index d72ef3aa366..a651e03105c 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -253,12 +253,6 @@ func mSpan_Sweep(s *mspan, preserve bool) bool { } c.local_nlargefree++ c.local_largefree += size - reduction := int64(size) * int64(gcpercent+100) / 100 - if int64(memstats.next_gc)-reduction > int64(heapminimum) { - xadd64(&memstats.next_gc, -reduction) - } else { - atomicstore64(&memstats.next_gc, heapminimum) - } res = true } else { // Free small object. @@ -294,13 +288,6 @@ func mSpan_Sweep(s *mspan, preserve bool) bool { } if nfree > 0 { c.local_nsmallfree[cl] += uintptr(nfree) - c.local_cachealloc -= intptr(uintptr(nfree) * size) - reduction := int64(nfree) * int64(size) * int64(gcpercent+100) / 100 - if int64(memstats.next_gc)-reduction > int64(heapminimum) { - xadd64(&memstats.next_gc, -reduction) - } else { - atomicstore64(&memstats.next_gc, heapminimum) - } res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head, end, preserve) // MCentral_FreeSpan updates sweepgen } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index e94b79fb8f4..4a023e5624c 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -372,7 +372,7 @@ func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan } // transfer stats from cache to global - memstats.heap_alloc += uint64(_g_.m.mcache.local_cachealloc) + memstats.heap_live += uint64(_g_.m.mcache.local_cachealloc) _g_.m.mcache.local_cachealloc = 0 memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs) _g_.m.mcache.local_tinyallocs = 0 @@ -402,7 +402,7 @@ func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan // update stats, sweep lists if large { memstats.heap_objects++ - memstats.heap_alloc += uint64(npage << _PageShift) + memstats.heap_live += uint64(npage << _PageShift) // Swept spans are at the end of lists. if s.npages < uintptr(len(h.free)) { mSpanList_InsertBack(&h.busy[s.npages], s) @@ -628,12 +628,11 @@ func mHeap_Free(h *mheap, s *mspan, acct int32) { systemstack(func() { mp := getg().m lock(&h.lock) - memstats.heap_alloc += uint64(mp.mcache.local_cachealloc) + memstats.heap_live += uint64(mp.mcache.local_cachealloc) mp.mcache.local_cachealloc = 0 memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs) mp.mcache.local_tinyallocs = 0 if acct != 0 { - memstats.heap_alloc -= uint64(s.npages << _PageShift) memstats.heap_objects-- } mHeap_FreeSpanLocked(h, s, true, true, 0) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index d2e89510c13..568a2ba4a9f 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -59,7 +59,15 @@ type mstats struct { nfree uint64 } + // Statistics below here are not exported to Go directly. + tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly + + // heap_live is the number of bytes considered live by the GC. + // That is: retained by the most recent GC plus allocated + // since then. heap_live <= heap_alloc, since heap_live + // excludes unmarked objects that have not yet been swept. + heap_live uint64 } var memstats mstats @@ -317,7 +325,7 @@ func flushallmcaches() { func purgecachedstats(c *mcache) { // Protected by either heap or GC lock. h := &mheap_ - memstats.heap_alloc += uint64(c.local_cachealloc) + memstats.heap_live += uint64(c.local_cachealloc) c.local_cachealloc = 0 if trace.enabled { traceHeapAlloc() diff --git a/src/runtime/trace.go b/src/runtime/trace.go index 7c4d8d3c91e..51468507709 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -49,7 +49,7 @@ const ( traceEvGoSysBlock = 30 // syscall blocks [timestamp] traceEvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [goroutine id] traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [goroutine id] - traceEvHeapAlloc = 33 // memstats.heap_alloc change [timestamp, heap_alloc] + traceEvHeapAlloc = 33 // memstats.heap_live change [timestamp, heap_alloc] traceEvNextGC = 34 // memstats.next_gc change [timestamp, next_gc] traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id] traceEvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp] @@ -813,7 +813,7 @@ func traceGoSysBlock(pp *p) { } func traceHeapAlloc() { - traceEvent(traceEvHeapAlloc, -1, memstats.heap_alloc) + traceEvent(traceEvHeapAlloc, -1, memstats.heap_live) } func traceNextGC() {