2014-11-11 17:05:02 -05:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
// Page heap.
|
|
|
|
|
//
|
2015-02-19 13:38:46 -05:00
|
|
|
// See malloc.go for overview.
|
|
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
|
2015-11-02 14:09:24 -05:00
|
|
|
import (
|
|
|
|
|
"runtime/internal/atomic"
|
2015-11-11 12:39:30 -05:00
|
|
|
"runtime/internal/sys"
|
2015-11-02 14:09:24 -05:00
|
|
|
"unsafe"
|
|
|
|
|
)
|
2015-02-19 13:38:46 -05:00
|
|
|
|
|
|
|
|
// Main malloc heap.
|
|
|
|
|
// The heap itself is the "free[]" and "large" arrays,
|
|
|
|
|
// but all the other global data is here too.
|
|
|
|
|
type mheap struct {
|
|
|
|
|
lock mutex
|
2015-10-15 15:59:49 -07:00
|
|
|
free [_MaxMHeapList]mSpanList // free lists of given length
|
|
|
|
|
freelarge mSpanList // free lists length >= _MaxMHeapList
|
|
|
|
|
busy [_MaxMHeapList]mSpanList // busy lists of large objects of given length
|
|
|
|
|
busylarge mSpanList // busy lists of large objects length >= _MaxMHeapList
|
|
|
|
|
allspans **mspan // all spans out there
|
|
|
|
|
gcspans **mspan // copy of allspans referenced by gc marker or sweeper
|
2015-02-19 13:38:46 -05:00
|
|
|
nspan uint32
|
|
|
|
|
sweepgen uint32 // sweep generation, see comment in mspan
|
|
|
|
|
sweepdone uint32 // all spans are swept
|
|
|
|
|
// span lookup
|
|
|
|
|
spans **mspan
|
|
|
|
|
spans_mapped uintptr
|
|
|
|
|
|
2015-05-11 12:03:30 -04:00
|
|
|
// Proportional sweep
|
2015-09-26 12:31:59 -04:00
|
|
|
pagesInUse uint64 // pages of spans in stats _MSpanInUse; R/W with mheap.lock
|
runtime: make sweep proportional to spans bytes allocated
Proportional concurrent sweep is currently based on a ratio of spans
to be swept per bytes of object allocation. However, proportional
sweeping is performed during span allocation, not object allocation,
in order to minimize contention and overhead. Since objects are
allocated from spans after those spans are allocated, the system tends
to operate in debt, which means when the next GC cycle starts, there
is often sweep debt remaining, so GC has to finish the sweep, which
delays the start of the cycle and delays enabling mutator assists.
For example, it's quite likely that many Ps will simultaneously refill
their span caches immediately after a GC cycle (because GC flushes the
span caches), but at this point, there has been very little object
allocation since the end of GC, so very little sweeping is done. The
Ps then allocate objects from these cached spans, which drives up the
bytes of object allocation, but since these allocations are coming
from cached spans, nothing considers whether more sweeping has to
happen. If the sweep ratio is high enough (which can happen if the
next GC trigger is very close to the retained heap size), this can
easily represent a sweep debt of thousands of pages.
Fix this by making proportional sweep proportional to the number of
bytes of spans allocated, rather than the number of bytes of objects
allocated. Prior to allocating a span, both the small object path and
the large object path ensure credit for allocating that span, so the
system operates in the black, rather than in the red.
Combined with the previous commit, this should eliminate all sweeping
from GC start up. On the stress test in issue #11911, this reduces the
time spent sweeping during GC (and delaying start up) by several
orders of magnitude:
mean 99%ile max
pre fix 1 ms 11 ms 144 ms
post fix 270 ns 735 ns 916 ns
Updates #11911.
Change-Id: I89223712883954c9d6ec2a7a51ecb97172097df3
Reviewed-on: https://go-review.googlesource.com/13044
Reviewed-by: Rick Hudson <rlh@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-08-03 09:46:50 -04:00
|
|
|
spanBytesAlloc uint64 // bytes of spans allocated this cycle; updated atomically
|
2015-05-11 12:03:30 -04:00
|
|
|
pagesSwept uint64 // pages swept this cycle; updated atomically
|
|
|
|
|
sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
|
2015-09-26 12:31:59 -04:00
|
|
|
// TODO(austin): pagesInUse should be a uintptr, but the 386
|
|
|
|
|
// compiler can't 8-byte align fields.
|
2015-05-11 12:03:30 -04:00
|
|
|
|
|
|
|
|
// Malloc stats.
|
|
|
|
|
largefree uint64 // bytes freed for large objects (>maxsmallsize)
|
|
|
|
|
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
|
|
|
|
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// range of addresses we might see in the heap
|
|
|
|
|
bitmap uintptr
|
|
|
|
|
bitmap_mapped uintptr
|
|
|
|
|
arena_start uintptr
|
2015-06-22 11:18:23 -04:00
|
|
|
arena_used uintptr // always mHeap_Map{Bits,Spans} before updating
|
2015-02-19 13:38:46 -05:00
|
|
|
arena_end uintptr
|
|
|
|
|
arena_reserved bool
|
|
|
|
|
|
|
|
|
|
// central free lists for small size classes.
|
|
|
|
|
// the padding makes sure that the MCentrals are
|
|
|
|
|
// spaced CacheLineSize bytes apart, so that each MCentral.lock
|
|
|
|
|
// gets its own cache line.
|
|
|
|
|
central [_NumSizeClasses]struct {
|
|
|
|
|
mcentral mcentral
|
2015-11-11 12:39:30 -05:00
|
|
|
pad [sys.CacheLineSize]byte
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spanalloc fixalloc // allocator for span*
|
|
|
|
|
cachealloc fixalloc // allocator for mcache*
|
|
|
|
|
specialfinalizeralloc fixalloc // allocator for specialfinalizer*
|
|
|
|
|
specialprofilealloc fixalloc // allocator for specialprofile*
|
2015-06-11 16:49:38 +03:00
|
|
|
speciallock mutex // lock for special record allocators.
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var mheap_ mheap
|
|
|
|
|
|
|
|
|
|
// An MSpan is a run of pages.
|
2014-11-11 17:05:02 -05:00
|
|
|
//
|
|
|
|
|
// When a MSpan is in the heap free list, state == MSpanFree
|
|
|
|
|
// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
|
|
|
|
|
//
|
|
|
|
|
// When a MSpan is allocated, state == MSpanInUse or MSpanStack
|
|
|
|
|
// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// Every MSpan is in one doubly-linked list,
|
|
|
|
|
// either one of the MHeap's free lists or one of the
|
2015-10-15 15:59:49 -07:00
|
|
|
// MCentral's span lists.
|
2015-02-19 13:38:46 -05:00
|
|
|
|
runtime: don't free stack spans during GC
Memory for stacks is manually managed by the runtime and, currently
(with one exception) we free stack spans immediately when the last
stack on a span is freed. However, the garbage collector assumes that
spans can never transition from non-free to free during scan or mark.
This disagreement makes it possible for the garbage collector to mark
uninitialized objects and is blocking us from re-enabling the bad
pointer test in the garbage collector (issue #9880).
For example, the following sequence will result in marking an
uninitialized object:
1. scanobject loads a pointer slot out of the object it's scanning.
This happens to be one of the special pointers from the heap into a
stack. Call the pointer p and suppose it points into X's stack.
2. X, running on another thread, grows its stack and frees its old
stack.
3. The old stack happens to be large or was the last stack in its
span, so X frees this span, setting it to state _MSpanFree.
4. The span gets reused as a heap span.
5. scanobject calls heapBitsForObject, which loads the span containing
p, which is now in state _MSpanInUse, but doesn't necessarily have
an object at p. The not-object at p gets marked, and at this point
all sorts of things can go wrong.
We already have a partial solution to this. When shrinking a stack, we
put the old stack on a queue to be freed at the end of garbage
collection. This was done to address exactly this problem, but wasn't
a complete solution.
This commit generalizes this solution to both shrinking and growing
stacks. For stacks that fit in the stack pool, we simply don't free
the span, even if its reference count reaches zero. It's fine to reuse
the span for other stacks, and this enables that. At the end of GC, we
sweep for cached stack spans with a zero reference count and free
them. For larger stacks, we simply queue the stack span to be freed at
the end of GC. Ideally, we would reuse these large stack spans the way
we can small stack spans, but that's a more invasive change that will
have to wait until after the freeze.
Fixes #11267.
Change-Id: Ib7f2c5da4845cc0268e8dc098b08465116972a71
Reviewed-on: https://go-review.googlesource.com/11502
Reviewed-by: Russ Cox <rsc@golang.org>
2015-06-22 10:24:50 -04:00
|
|
|
// An MSpan representing actual memory has state _MSpanInUse,
|
|
|
|
|
// _MSpanStack, or _MSpanFree. Transitions between these states are
|
|
|
|
|
// constrained as follows:
|
|
|
|
|
//
|
|
|
|
|
// * A span may transition from free to in-use or stack during any GC
|
|
|
|
|
// phase.
|
|
|
|
|
//
|
|
|
|
|
// * During sweeping (gcphase == _GCoff), a span may transition from
|
|
|
|
|
// in-use to free (as a result of sweeping) or stack to free (as a
|
|
|
|
|
// result of stacks being freed).
|
|
|
|
|
//
|
|
|
|
|
// * During GC (gcphase != _GCoff), a span *must not* transition from
|
|
|
|
|
// stack or in-use to free. Because concurrent GC may read a pointer
|
|
|
|
|
// and then look up its span, the span state must be monotonic.
|
2015-02-19 13:38:46 -05:00
|
|
|
const (
|
|
|
|
|
_MSpanInUse = iota // allocated for garbage collected heap
|
|
|
|
|
_MSpanStack // allocated for use by stack allocator
|
|
|
|
|
_MSpanFree
|
|
|
|
|
_MSpanDead
|
|
|
|
|
)
|
|
|
|
|
|
2015-10-15 15:59:49 -07:00
|
|
|
// mSpanList heads a linked list of spans.
|
|
|
|
|
//
|
|
|
|
|
// Linked list structure is based on BSD's "tail queue" data structure.
|
|
|
|
|
type mSpanList struct {
|
|
|
|
|
first *mspan // first span in list, or nil if none
|
|
|
|
|
last **mspan // last span's next field, or first if none
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
type mspan struct {
|
2015-10-15 15:59:49 -07:00
|
|
|
next *mspan // next span in list, or nil if none
|
|
|
|
|
prev **mspan // previous span's next field, or list head's first field if none
|
|
|
|
|
list *mSpanList // For debugging. TODO: Remove.
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
start pageID // starting page number
|
|
|
|
|
npages uintptr // number of pages in span
|
|
|
|
|
freelist gclinkptr // list of free objects
|
|
|
|
|
// sweep generation:
|
|
|
|
|
// if sweepgen == h->sweepgen - 2, the span needs sweeping
|
|
|
|
|
// if sweepgen == h->sweepgen - 1, the span is currently being swept
|
|
|
|
|
// if sweepgen == h->sweepgen, the span is swept and ready to use
|
|
|
|
|
// h->sweepgen is incremented by 2 after every GC
|
2015-04-15 17:08:58 -04:00
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
sweepgen uint32
|
2015-03-04 11:34:50 -05:00
|
|
|
divMul uint32 // for divide by elemsize - divMagic.mul
|
2015-02-19 13:38:46 -05:00
|
|
|
ref uint16 // capacity - number of objects in freelist
|
|
|
|
|
sizeclass uint8 // size class
|
|
|
|
|
incache bool // being used by an mcache
|
|
|
|
|
state uint8 // mspaninuse etc
|
|
|
|
|
needzero uint8 // needs to be zeroed before allocation
|
2015-03-04 11:34:50 -05:00
|
|
|
divShift uint8 // for divide by elemsize - divMagic.shift
|
|
|
|
|
divShift2 uint8 // for divide by elemsize - divMagic.shift2
|
2015-02-19 13:38:46 -05:00
|
|
|
elemsize uintptr // computed from sizeclass or from npages
|
|
|
|
|
unusedsince int64 // first time spotted by gc in mspanfree state
|
|
|
|
|
npreleased uintptr // number of pages released to the os
|
|
|
|
|
limit uintptr // end of data in span
|
|
|
|
|
speciallock mutex // guards specials list
|
|
|
|
|
specials *special // linked list of special records sorted by offset.
|
2015-04-15 17:08:58 -04:00
|
|
|
baseMask uintptr // if non-0, elemsize is a power of 2, & this will get object allocation base
|
2015-02-19 13:38:46 -05:00
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
func (s *mspan) base() uintptr {
|
|
|
|
|
return uintptr(s.start << _PageShift)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *mspan) layout() (size, n, total uintptr) {
|
|
|
|
|
total = s.npages << _PageShift
|
|
|
|
|
size = s.elemsize
|
|
|
|
|
if size > 0 {
|
|
|
|
|
n = total / size
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
var h_allspans []*mspan // TODO: make this h.allspans once mheap can be defined in Go
|
2015-02-24 09:25:09 -08:00
|
|
|
|
|
|
|
|
// h_spans is a lookup table to map virtual address page IDs to *mspan.
|
|
|
|
|
// For allocated spans, their pages map to the span itself.
|
|
|
|
|
// For free spans, only the lowest and highest pages map to the span itself. Internal
|
|
|
|
|
// pages map to an arbitrary span.
|
|
|
|
|
// For pages that have never been allocated, h_spans entries are nil.
|
|
|
|
|
var h_spans []*mspan // TODO: make this h.spans once mheap can be defined in Go
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
|
|
|
|
|
h := (*mheap)(vh)
|
|
|
|
|
s := (*mspan)(p)
|
|
|
|
|
if len(h_allspans) >= cap(h_allspans) {
|
2015-11-11 12:39:30 -05:00
|
|
|
n := 64 * 1024 / sys.PtrSize
|
2014-11-11 17:05:02 -05:00
|
|
|
if n < cap(h_allspans)*3/2 {
|
|
|
|
|
n = cap(h_allspans) * 3 / 2
|
|
|
|
|
}
|
|
|
|
|
var new []*mspan
|
|
|
|
|
sp := (*slice)(unsafe.Pointer(&new))
|
2015-11-11 12:39:30 -05:00
|
|
|
sp.array = sysAlloc(uintptr(n)*sys.PtrSize, &memstats.other_sys)
|
2014-11-11 17:05:02 -05:00
|
|
|
if sp.array == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("runtime: cannot allocate memory")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-04-11 10:01:54 +12:00
|
|
|
sp.len = len(h_allspans)
|
|
|
|
|
sp.cap = n
|
2014-11-11 17:05:02 -05:00
|
|
|
if len(h_allspans) > 0 {
|
|
|
|
|
copy(new, h_allspans)
|
|
|
|
|
// Don't free the old array if it's referenced by sweep.
|
2015-03-11 12:58:47 -07:00
|
|
|
// See the comment in mgc.go.
|
2014-11-11 17:05:02 -05:00
|
|
|
if h.allspans != mheap_.gcspans {
|
2015-11-11 12:39:30 -05:00
|
|
|
sysFree(unsafe.Pointer(h.allspans), uintptr(cap(h_allspans))*sys.PtrSize, &memstats.other_sys)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
h_allspans = new
|
|
|
|
|
h.allspans = (**mspan)(unsafe.Pointer(sp.array))
|
|
|
|
|
}
|
|
|
|
|
h_allspans = append(h_allspans, s)
|
|
|
|
|
h.nspan = uint32(len(h_allspans))
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// inheap reports whether b is a pointer into a (potentially dead) heap object.
|
|
|
|
|
// It returns false for pointers into stack spans.
|
runtime: fix callwritebarrier
Given a call frame F of size N where the return values start at offset R,
callwritebarrier was instructing heapBitsBulkBarrier to scan the block
of memory [F+R, F+R+N). It should only scan [F+R, F+N). The extra N-R
bytes scanned might lead into the next allocated block in memory.
Because the scan was consulting the heap bitmap for type information,
scanning into the next block normally "just worked" in the sense of
not crashing.
Scanning the extra N-R bytes of memory is a problem mainly because
it causes the GC to consider pointers that might otherwise not be
considered, leading it to retain objects that should actually be freed.
This is very difficult to detect.
Luckily, juju turned up a case where the heap bitmap and the memory
were out of sync for the block immediately after the call frame, so that
heapBitsBulkBarrier saw an obvious non-pointer where it expected a
pointer, causing a loud crash.
Why is there a non-pointer in memory that the heap bitmap records as
a pointer? That is more difficult to answer. At least one way that it
could happen is that allocations containing no pointers at all do not
update the heap bitmap. So if heapBitsBulkBarrier walked out of the
current object and into a no-pointer object and consulted those bitmap
bits, it would be misled. This doesn't happen in general because all
the paths to heapBitsBulkBarrier first check for the no-pointer case.
This may or may not be what happened, but it's the only scenario
I've been able to construct.
I tried for quite a while to write a simple test for this and could not.
It does fix the juju crash, and it is clearly an improvement over the
old code.
Fixes #10844.
Change-Id: I53982c93ef23ef93155c4086bbd95a4c4fdaac9a
Reviewed-on: https://go-review.googlesource.com/10317
Reviewed-by: Austin Clements <austin@google.com>
2015-05-19 22:58:10 -04:00
|
|
|
// Non-preemptible because it is used by write barriers.
|
2015-02-19 13:38:46 -05:00
|
|
|
//go:nowritebarrier
|
runtime: fix callwritebarrier
Given a call frame F of size N where the return values start at offset R,
callwritebarrier was instructing heapBitsBulkBarrier to scan the block
of memory [F+R, F+R+N). It should only scan [F+R, F+N). The extra N-R
bytes scanned might lead into the next allocated block in memory.
Because the scan was consulting the heap bitmap for type information,
scanning into the next block normally "just worked" in the sense of
not crashing.
Scanning the extra N-R bytes of memory is a problem mainly because
it causes the GC to consider pointers that might otherwise not be
considered, leading it to retain objects that should actually be freed.
This is very difficult to detect.
Luckily, juju turned up a case where the heap bitmap and the memory
were out of sync for the block immediately after the call frame, so that
heapBitsBulkBarrier saw an obvious non-pointer where it expected a
pointer, causing a loud crash.
Why is there a non-pointer in memory that the heap bitmap records as
a pointer? That is more difficult to answer. At least one way that it
could happen is that allocations containing no pointers at all do not
update the heap bitmap. So if heapBitsBulkBarrier walked out of the
current object and into a no-pointer object and consulted those bitmap
bits, it would be misled. This doesn't happen in general because all
the paths to heapBitsBulkBarrier first check for the no-pointer case.
This may or may not be what happened, but it's the only scenario
I've been able to construct.
I tried for quite a while to write a simple test for this and could not.
It does fix the juju crash, and it is clearly an improvement over the
old code.
Fixes #10844.
Change-Id: I53982c93ef23ef93155c4086bbd95a4c4fdaac9a
Reviewed-on: https://go-review.googlesource.com/10317
Reviewed-by: Austin Clements <austin@google.com>
2015-05-19 22:58:10 -04:00
|
|
|
//go:nosplit
|
2015-02-19 13:38:46 -05:00
|
|
|
func inheap(b uintptr) bool {
|
|
|
|
|
if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
// Not a beginning of a block, consult span table to find the block beginning.
|
|
|
|
|
k := b >> _PageShift
|
|
|
|
|
x := k
|
|
|
|
|
x -= mheap_.arena_start >> _PageShift
|
|
|
|
|
s := h_spans[x]
|
|
|
|
|
if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
runtime: eliminate one heapBitsForObject from scanobject
scanobject with ptrmask!=nil is only ever called with the base
pointer of a heap object. Currently, scanobject calls
heapBitsForObject, which goes to a great deal of trouble to check
that the pointer points into the heap and to find the base of the
object it points to, both of which are completely unnecessary in
this case.
Replace this call to heapBitsForObject with much simpler logic to
fetch the span and compute the heap bits.
Benchmark results with five runs:
name old mean new mean delta
BenchmarkBinaryTree17 9.21s × (0.95,1.02) 8.55s × (0.91,1.03) -7.16% (p=0.022)
BenchmarkFannkuch11 2.65s × (1.00,1.00) 2.62s × (1.00,1.00) -1.10% (p=0.000)
BenchmarkFmtFprintfEmpty 73.2ns × (0.99,1.01) 71.7ns × (1.00,1.01) -1.99% (p=0.004)
BenchmarkFmtFprintfString 302ns × (0.99,1.00) 292ns × (0.98,1.02) -3.31% (p=0.020)
BenchmarkFmtFprintfInt 281ns × (0.98,1.01) 279ns × (0.96,1.02) ~ (p=0.596)
BenchmarkFmtFprintfIntInt 482ns × (0.98,1.01) 488ns × (0.95,1.02) ~ (p=0.419)
BenchmarkFmtFprintfPrefixedInt 382ns × (0.99,1.01) 365ns × (0.96,1.02) -4.35% (p=0.015)
BenchmarkFmtFprintfFloat 475ns × (0.99,1.01) 472ns × (1.00,1.00) ~ (p=0.108)
BenchmarkFmtManyArgs 1.89µs × (1.00,1.01) 1.90µs × (0.94,1.02) ~ (p=0.883)
BenchmarkGobDecode 22.4ms × (0.99,1.01) 21.9ms × (0.92,1.04) ~ (p=0.332)
BenchmarkGobEncode 24.7ms × (0.98,1.02) 23.9ms × (0.87,1.07) ~ (p=0.407)
BenchmarkGzip 397ms × (0.99,1.01) 398ms × (0.99,1.01) ~ (p=0.718)
BenchmarkGunzip 96.7ms × (1.00,1.00) 96.9ms × (1.00,1.00) ~ (p=0.230)
BenchmarkHTTPClientServer 71.5µs × (0.98,1.01) 68.5µs × (0.92,1.06) ~ (p=0.243)
BenchmarkJSONEncode 46.1ms × (0.98,1.01) 44.9ms × (0.98,1.03) -2.51% (p=0.040)
BenchmarkJSONDecode 86.1ms × (0.99,1.01) 86.5ms × (0.99,1.01) ~ (p=0.343)
BenchmarkMandelbrot200 4.12ms × (1.00,1.00) 4.13ms × (1.00,1.00) +0.23% (p=0.000)
BenchmarkGoParse 5.89ms × (0.96,1.03) 5.82ms × (0.96,1.04) ~ (p=0.522)
BenchmarkRegexpMatchEasy0_32 141ns × (0.99,1.01) 142ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy0_1K 408ns × (1.00,1.00) 392ns × (0.99,1.00) -3.83% (p=0.000)
BenchmarkRegexpMatchEasy1_32 122ns × (1.00,1.00) 122ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy1_1K 626ns × (1.00,1.01) 624ns × (0.99,1.00) ~ (p=0.122)
BenchmarkRegexpMatchMedium_32 202ns × (0.99,1.00) 205ns × (0.99,1.01) +1.58% (p=0.001)
BenchmarkRegexpMatchMedium_1K 54.4µs × (1.00,1.00) 55.5µs × (1.00,1.00) +1.86% (p=0.000)
BenchmarkRegexpMatchHard_32 2.68µs × (1.00,1.00) 2.71µs × (1.00,1.00) +0.97% (p=0.002)
BenchmarkRegexpMatchHard_1K 79.8µs × (1.00,1.01) 80.5µs × (1.00,1.01) +0.94% (p=0.003)
BenchmarkRevcomp 590ms × (0.99,1.01) 585ms × (1.00,1.00) ~ (p=0.066)
BenchmarkTemplate 111ms × (0.97,1.02) 112ms × (0.99,1.01) ~ (p=0.201)
BenchmarkTimeParse 392ns × (1.00,1.00) 385ns × (1.00,1.00) -1.69% (p=0.000)
BenchmarkTimeFormat 449ns × (0.98,1.01) 448ns × (0.99,1.01) ~ (p=0.550)
Change-Id: Ie7c3830c481d96c9043e7bf26853c6c1d05dc9f4
Reviewed-on: https://go-review.googlesource.com/9364
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-26 18:27:17 -04:00
|
|
|
// TODO: spanOf and spanOfUnchecked are open-coded in a lot of places.
|
|
|
|
|
// Use the functions instead.
|
|
|
|
|
|
|
|
|
|
// spanOf returns the span of p. If p does not point into the heap or
|
|
|
|
|
// no span contains p, spanOf returns nil.
|
|
|
|
|
func spanOf(p uintptr) *mspan {
|
|
|
|
|
if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return spanOfUnchecked(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
|
|
|
|
|
// that p points into the heap (that is, mheap_.arena_start <= p <
|
|
|
|
|
// mheap_.arena_used).
|
|
|
|
|
func spanOfUnchecked(p uintptr) *mspan {
|
|
|
|
|
return h_spans[(p-mheap_.arena_start)>>_PageShift]
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
|
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
|
|
_g_.m.mcache.local_nlookup++
|
2015-11-11 12:39:30 -05:00
|
|
|
if sys.PtrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
|
2015-02-19 13:38:46 -05:00
|
|
|
// purge cache stats to prevent overflow
|
|
|
|
|
lock(&mheap_.lock)
|
|
|
|
|
purgecachedstats(_g_.m.mcache)
|
|
|
|
|
unlock(&mheap_.lock)
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
s := mheap_.lookupMaybe(unsafe.Pointer(v))
|
2015-02-19 13:38:46 -05:00
|
|
|
if sp != nil {
|
|
|
|
|
*sp = s
|
|
|
|
|
}
|
|
|
|
|
if s == nil {
|
|
|
|
|
if base != nil {
|
|
|
|
|
*base = 0
|
|
|
|
|
}
|
|
|
|
|
if size != nil {
|
|
|
|
|
*size = 0
|
|
|
|
|
}
|
|
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p := uintptr(s.start) << _PageShift
|
|
|
|
|
if s.sizeclass == 0 {
|
|
|
|
|
// Large object.
|
|
|
|
|
if base != nil {
|
|
|
|
|
*base = p
|
|
|
|
|
}
|
|
|
|
|
if size != nil {
|
|
|
|
|
*size = s.npages << _PageShift
|
|
|
|
|
}
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n := s.elemsize
|
|
|
|
|
if base != nil {
|
|
|
|
|
i := (uintptr(v) - uintptr(p)) / n
|
|
|
|
|
*base = p + i*n
|
|
|
|
|
}
|
|
|
|
|
if size != nil {
|
|
|
|
|
*size = n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Initialize the heap.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) init(spans_size uintptr) {
|
|
|
|
|
h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
|
|
|
|
|
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
|
|
|
|
|
h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
|
|
|
|
|
h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
// h->mapcache needs no init
|
|
|
|
|
for i := range h.free {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.free[i].init()
|
|
|
|
|
h.busy[i].init()
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freelarge.init()
|
|
|
|
|
h.busylarge.init()
|
2014-11-11 17:05:02 -05:00
|
|
|
for i := range h.central {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.central[i].mcentral.init(int32(i))
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sp := (*slice)(unsafe.Pointer(&h_spans))
|
2015-04-11 10:01:54 +12:00
|
|
|
sp.array = unsafe.Pointer(h.spans)
|
2015-11-11 12:39:30 -05:00
|
|
|
sp.len = int(spans_size / sys.PtrSize)
|
|
|
|
|
sp.cap = int(spans_size / sys.PtrSize)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-06-07 22:59:29 -04:00
|
|
|
// mHeap_MapSpans makes sure that the spans are mapped
|
|
|
|
|
// up to the new value of arena_used.
|
|
|
|
|
//
|
|
|
|
|
// It must be called with the expected new value of arena_used,
|
|
|
|
|
// *before* h.arena_used has been updated.
|
|
|
|
|
// Waiting to update arena_used until after the memory has been mapped
|
|
|
|
|
// avoids faults when other threads try access the bitmap immediately
|
|
|
|
|
// after observing the change to arena_used.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) mapSpans(arena_used uintptr) {
|
2014-11-11 17:05:02 -05:00
|
|
|
// Map spans array, PageSize at a time.
|
2015-06-07 22:59:29 -04:00
|
|
|
n := arena_used
|
|
|
|
|
n -= h.arena_start
|
2015-11-11 12:39:30 -05:00
|
|
|
n = n / _PageSize * sys.PtrSize
|
|
|
|
|
n = round(n, sys.PhysPageSize)
|
2014-11-11 17:05:02 -05:00
|
|
|
if h.spans_mapped >= n {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
sysMap(add(unsafe.Pointer(h.spans), h.spans_mapped), n-h.spans_mapped, h.arena_reserved, &memstats.other_sys)
|
|
|
|
|
h.spans_mapped = n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sweeps spans in list until reclaims at least npages into heap.
|
|
|
|
|
// Returns the actual number of pages reclaimed.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr {
|
2014-11-11 17:05:02 -05:00
|
|
|
n := uintptr(0)
|
|
|
|
|
sg := mheap_.sweepgen
|
|
|
|
|
retry:
|
2015-10-15 15:59:49 -07:00
|
|
|
for s := list.first; s != nil; s = s.next {
|
2015-11-02 14:09:24 -05:00
|
|
|
if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
|
2015-11-11 16:13:51 -08:00
|
|
|
list.remove(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
// swept spans are at the end of the list
|
2015-11-11 16:13:51 -08:00
|
|
|
list.insertBack(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
2015-04-13 22:43:05 -04:00
|
|
|
snpages := s.npages
|
2015-11-11 16:13:51 -08:00
|
|
|
if s.sweep(false) {
|
2015-04-13 22:43:05 -04:00
|
|
|
n += snpages
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
lock(&h.lock)
|
|
|
|
|
if n >= npages {
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
// the span could have been moved elsewhere
|
|
|
|
|
goto retry
|
|
|
|
|
}
|
|
|
|
|
if s.sweepgen == sg-1 {
|
|
|
|
|
// the span is being sweept by background sweeper, skip
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
// already swept empty span,
|
|
|
|
|
// all subsequent ones must also be either swept or in process of sweeping
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sweeps and reclaims at least npage pages into heap.
|
|
|
|
|
// Called before allocating npage pages.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) reclaim(npage uintptr) {
|
2014-11-11 17:05:02 -05:00
|
|
|
// First try to sweep busy spans with large objects of size >= npage,
|
|
|
|
|
// this has good chances of reclaiming the necessary space.
|
|
|
|
|
for i := int(npage); i < len(h.busy); i++ {
|
2015-11-11 16:13:51 -08:00
|
|
|
if h.reclaimList(&h.busy[i], npage) != 0 {
|
2014-11-11 17:05:02 -05:00
|
|
|
return // Bingo!
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Then -- even larger objects.
|
2015-11-11 16:13:51 -08:00
|
|
|
if h.reclaimList(&h.busylarge, npage) != 0 {
|
2014-11-11 17:05:02 -05:00
|
|
|
return // Bingo!
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now try smaller objects.
|
|
|
|
|
// One such object is not enough, so we need to reclaim several of them.
|
|
|
|
|
reclaimed := uintptr(0)
|
|
|
|
|
for i := 0; i < int(npage) && i < len(h.busy); i++ {
|
2015-11-11 16:13:51 -08:00
|
|
|
reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
|
2014-11-11 17:05:02 -05:00
|
|
|
if reclaimed >= npage {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Now sweep everything that is not yet swept.
|
|
|
|
|
unlock(&h.lock)
|
|
|
|
|
for {
|
|
|
|
|
n := sweepone()
|
|
|
|
|
if n == ^uintptr(0) { // all spans are swept
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
reclaimed += n
|
|
|
|
|
if reclaimed >= npage {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
lock(&h.lock)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a new span of npage pages from the heap for GC'd memory
|
|
|
|
|
// and record its size class in the HeapMap and HeapMapCache.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
if _g_ != _g_.m.g0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("_mheap_alloc not on g0 stack")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
lock(&h.lock)
|
|
|
|
|
|
|
|
|
|
// To prevent excessive heap growth, before allocating n pages
|
|
|
|
|
// we need to sweep and reclaim at least n pages.
|
|
|
|
|
if h.sweepdone == 0 {
|
runtime: finish sweeping before concurrent GC starts
Currently, the concurrent sweep follows a 1:1 rule: when allocation
needs a span, it sweeps a span (likewise, when a large allocation
needs N pages, it sweeps until it frees N pages). This rule worked
well for the STW collector (especially when GOGC==100) because it did
no more sweeping than necessary to keep the heap from growing, would
generally finish sweeping just before GC, and ensured good temporal
locality between sweeping a page and allocating from it.
It doesn't work well with concurrent GC. Since concurrent GC requires
starting GC earlier (sometimes much earlier), the sweep often won't be
done when GC starts. Unfortunately, the first thing GC has to do is
finish the sweep. In the mean time, the mutator can continue
allocating, pushing the heap size even closer to the goal size. This
worked okay with the 7/8ths trigger, but it gets into a vicious cycle
with the GC trigger controller: if the mutator is allocating quickly
and driving the trigger lower, more and more sweep work will be left
to GC; this both causes GC to take longer (allowing the mutator to
allocate more during GC) and delays the start of the concurrent mark
phase, which throws off the GC controller's statistics and generally
causes it to push the trigger even lower.
As an example of a particularly bad case, the garbage benchmark with
GOMAXPROCS=4 and -benchmem 512 (MB) spends the first 0.4-0.8 seconds
of each GC cycle sweeping, during which the heap grows by between
109MB and 252MB.
To fix this, this change replaces the 1:1 sweep rule with a
proportional sweep rule. At the end of GC, GC knows exactly how much
heap allocation will occur before the next concurrent GC as well as
how many span pages must be swept. This change computes this "sweep
ratio" and when the mallocgc asks for a span, the mcentral sweeps
enough spans to bring the swept span count into ratio with the
allocated byte count.
On the benchmark from above, this entirely eliminates sweeping at the
beginning of GC, which reduces the time between startGC readying the
GC goroutine and GC stopping the world for sweep termination to ~100µs
during which the heap grows at most 134KB.
Change-Id: I35422d6bba0c2310d48bb1f8f30a72d29e98c1af
Reviewed-on: https://go-review.googlesource.com/8921
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-13 23:34:57 -04:00
|
|
|
// TODO(austin): This tends to sweep a large number of
|
|
|
|
|
// spans in order to find a few completely free spans
|
|
|
|
|
// (for example, in the garbage benchmark, this sweeps
|
|
|
|
|
// ~30x the number of pages its trying to allocate).
|
|
|
|
|
// If GC kept a bit for whether there were any marks
|
|
|
|
|
// in a span, we could release these free spans
|
|
|
|
|
// at the end of GC and eliminate this entirely.
|
2015-11-11 16:13:51 -08:00
|
|
|
h.reclaim(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// transfer stats from cache to global
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 18:01:32 -04:00
|
|
|
memstats.heap_live += uint64(_g_.m.mcache.local_cachealloc)
|
2014-11-11 17:05:02 -05:00
|
|
|
_g_.m.mcache.local_cachealloc = 0
|
2015-05-04 16:10:49 -04:00
|
|
|
memstats.heap_scan += uint64(_g_.m.mcache.local_scan)
|
|
|
|
|
_g_.m.mcache.local_scan = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
|
|
|
|
|
_g_.m.mcache.local_tinyallocs = 0
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
s := h.allocSpanLocked(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
if s != nil {
|
|
|
|
|
// Record span info, because gc needs to be
|
|
|
|
|
// able to map interior pointer to containing span.
|
2015-11-02 14:09:24 -05:00
|
|
|
atomic.Store(&s.sweepgen, h.sweepgen)
|
2014-11-11 17:05:02 -05:00
|
|
|
s.state = _MSpanInUse
|
2014-11-20 12:08:13 -05:00
|
|
|
s.freelist = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
s.ref = 0
|
|
|
|
|
s.sizeclass = uint8(sizeclass)
|
|
|
|
|
if sizeclass == 0 {
|
|
|
|
|
s.elemsize = s.npages << _PageShift
|
2015-03-04 11:34:50 -05:00
|
|
|
s.divShift = 0
|
|
|
|
|
s.divMul = 0
|
|
|
|
|
s.divShift2 = 0
|
2015-04-15 17:08:58 -04:00
|
|
|
s.baseMask = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
} else {
|
|
|
|
|
s.elemsize = uintptr(class_to_size[sizeclass])
|
2015-03-04 11:34:50 -05:00
|
|
|
m := &class_to_divmagic[sizeclass]
|
|
|
|
|
s.divShift = m.shift
|
|
|
|
|
s.divMul = m.mul
|
|
|
|
|
s.divShift2 = m.shift2
|
2015-04-15 17:08:58 -04:00
|
|
|
s.baseMask = m.baseMask
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// update stats, sweep lists
|
2015-09-26 12:31:59 -04:00
|
|
|
h.pagesInUse += uint64(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
if large {
|
|
|
|
|
memstats.heap_objects++
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 18:01:32 -04:00
|
|
|
memstats.heap_live += uint64(npage << _PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
// Swept spans are at the end of lists.
|
|
|
|
|
if s.npages < uintptr(len(h.free)) {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.busy[s.npages].insertBack(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
} else {
|
2015-11-11 16:13:51 -08:00
|
|
|
h.busylarge.insertBack(s)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
runtime: directly track GC assist balance
Currently we track the per-G GC assist balance as two monotonically
increasing values: the bytes allocated by the G this cycle (gcalloc)
and the scan work performed by the G this cycle (gcscanwork). The
assist balance is hence assistRatio*gcalloc - gcscanwork.
This works, but has two important downsides:
1) It requires floating-point math to figure out if a G is in debt or
not. This makes it inappropriate to check for assist debt in the
hot path of mallocgc, so we only do this when a G allocates a new
span. As a result, Gs can operate "in the red", leading to
under-assist and extended GC cycle length.
2) Revising the assist ratio during a GC cycle can lead to an "assist
burst". If you think of plotting the scan work performed versus
heaps size, the assist ratio controls the slope of this line.
However, in the current system, the target line always passes
through 0 at the heap size that triggered GC, so if the runtime
increases the assist ratio, there has to be a potentially large
assist to jump from the current amount of scan work up to the new
target scan work for the current heap size.
This commit replaces this approach with directly tracking the GC
assist balance in terms of allocation credit bytes. Allocating N bytes
simply decreases this by N and assisting raises it by the amount of
scan work performed divided by the assist ratio (to get back to
bytes).
This will make it cheap to figure out if a G is in debt, which will
let us efficiently check if an assist is necessary *before* performing
an allocation and hence keep Gs "in the black".
This also fixes assist bursts because the assist ratio is now in terms
of *remaining* work, rather than work from the beginning of the GC
cycle. Hence, the plot of scan work versus heap size becomes
continuous: we can revise the slope, but this slope always starts from
where we are right now, rather than where we were at the beginning of
the cycle.
Change-Id: Ia821c5f07f8a433e8da7f195b52adfedd58bdf2c
Reviewed-on: https://go-review.googlesource.com/15408
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-10-04 20:16:57 -07:00
|
|
|
// heap_scan and heap_live were updated.
|
|
|
|
|
if gcBlackenEnabled != 0 {
|
|
|
|
|
gcController.revise()
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceHeapAlloc()
|
|
|
|
|
}
|
2015-06-23 14:04:09 -04:00
|
|
|
|
|
|
|
|
// h_spans is accessed concurrently without synchronization
|
|
|
|
|
// from other threads. Hence, there must be a store/store
|
|
|
|
|
// barrier here to ensure the writes to h_spans above happen
|
|
|
|
|
// before the caller can publish a pointer p to an object
|
|
|
|
|
// allocated from s. As soon as this happens, the garbage
|
|
|
|
|
// collector running on another processor could read p and
|
|
|
|
|
// look up s in h_spans. The unlock acts as the barrier to
|
|
|
|
|
// order these writes. On the read side, the data dependency
|
|
|
|
|
// between p and the index in h_spans orders the reads.
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) alloc(npage uintptr, sizeclass int32, large bool, needzero bool) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
// Don't do any operations that lock the heap on the G stack.
|
|
|
|
|
// It might trigger stack growth, and the stack growth code needs
|
|
|
|
|
// to be able to allocate heap.
|
|
|
|
|
var s *mspan
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2015-11-11 16:13:51 -08:00
|
|
|
s = h.alloc_m(npage, sizeclass, large)
|
2014-11-11 17:05:02 -05:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if s != nil {
|
|
|
|
|
if needzero && s.needzero != 0 {
|
|
|
|
|
memclr(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
|
|
|
|
|
}
|
|
|
|
|
s.needzero = 0
|
|
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) allocStack(npage uintptr) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
if _g_ != _g_.m.g0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("mheap_allocstack not on g0 stack")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
lock(&h.lock)
|
2015-11-11 16:13:51 -08:00
|
|
|
s := h.allocSpanLocked(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
if s != nil {
|
|
|
|
|
s.state = _MSpanStack
|
2014-11-20 12:08:13 -05:00
|
|
|
s.freelist = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
s.ref = 0
|
|
|
|
|
memstats.stacks_inuse += uint64(s.npages << _PageShift)
|
|
|
|
|
}
|
2015-06-23 14:04:09 -04:00
|
|
|
|
|
|
|
|
// This unlock acts as a release barrier. See mHeap_Alloc_m.
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocates a span of the given size. h must be locked.
|
|
|
|
|
// The returned span has been removed from the
|
|
|
|
|
// free list, but its state is still MSpanFree.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) allocSpanLocked(npage uintptr) *mspan {
|
2015-10-15 15:59:49 -07:00
|
|
|
var list *mSpanList
|
2014-11-11 17:05:02 -05:00
|
|
|
var s *mspan
|
|
|
|
|
|
|
|
|
|
// Try in fixed-size lists up to max.
|
|
|
|
|
for i := int(npage); i < len(h.free); i++ {
|
2015-10-15 15:59:49 -07:00
|
|
|
list = &h.free[i]
|
2015-11-11 16:13:51 -08:00
|
|
|
if !list.isEmpty() {
|
2015-10-15 15:59:49 -07:00
|
|
|
s = list.first
|
2014-11-11 17:05:02 -05:00
|
|
|
goto HaveSpan
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Best fit in list of large spans.
|
2015-10-15 15:59:49 -07:00
|
|
|
list = &h.freelarge
|
2015-11-11 16:13:51 -08:00
|
|
|
s = h.allocLarge(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
if s == nil {
|
2015-11-11 16:13:51 -08:00
|
|
|
if !h.grow(npage) {
|
2014-11-11 17:05:02 -05:00
|
|
|
return nil
|
|
|
|
|
}
|
2015-11-11 16:13:51 -08:00
|
|
|
s = h.allocLarge(npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
if s == nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HaveSpan:
|
|
|
|
|
// Mark span in use.
|
|
|
|
|
if s.state != _MSpanFree {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_AllocLocked - MSpan not free")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
if s.npages < npage {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_AllocLocked - bad npages")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-11-11 16:13:51 -08:00
|
|
|
list.remove(s)
|
|
|
|
|
if s.inList() {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("still in list")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
if s.npreleased > 0 {
|
2015-10-15 14:33:50 -07:00
|
|
|
sysUsed(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.heap_released -= uint64(s.npreleased << _PageShift)
|
|
|
|
|
s.npreleased = 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if s.npages > npage {
|
|
|
|
|
// Trim extra and put it back in the heap.
|
2015-11-11 16:13:51 -08:00
|
|
|
t := (*mspan)(h.spanalloc.alloc())
|
|
|
|
|
t.init(s.start+pageID(npage), s.npages-npage)
|
2014-11-11 17:05:02 -05:00
|
|
|
s.npages = npage
|
|
|
|
|
p := uintptr(t.start)
|
2015-10-26 17:53:22 -07:00
|
|
|
p -= (h.arena_start >> _PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
if p > 0 {
|
|
|
|
|
h_spans[p-1] = s
|
|
|
|
|
}
|
|
|
|
|
h_spans[p] = t
|
|
|
|
|
h_spans[p+t.npages-1] = t
|
|
|
|
|
t.needzero = s.needzero
|
|
|
|
|
s.state = _MSpanStack // prevent coalescing with s
|
|
|
|
|
t.state = _MSpanStack
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeSpanLocked(t, false, false, s.unusedsince)
|
2014-11-11 17:05:02 -05:00
|
|
|
s.state = _MSpanFree
|
|
|
|
|
}
|
|
|
|
|
s.unusedsince = 0
|
|
|
|
|
|
|
|
|
|
p := uintptr(s.start)
|
2015-10-26 17:53:22 -07:00
|
|
|
p -= (h.arena_start >> _PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
for n := uintptr(0); n < npage; n++ {
|
|
|
|
|
h_spans[p+n] = s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
memstats.heap_inuse += uint64(npage << _PageShift)
|
|
|
|
|
memstats.heap_idle -= uint64(npage << _PageShift)
|
|
|
|
|
|
|
|
|
|
//println("spanalloc", hex(s.start<<_PageShift))
|
2015-11-11 16:13:51 -08:00
|
|
|
if s.inList() {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("still in list")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate a span of exactly npage pages from the list of large spans.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) allocLarge(npage uintptr) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
return bestFit(&h.freelarge, npage, nil)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Search list for smallest span with >= npage pages.
|
|
|
|
|
// If there are multiple smallest spans, take the one
|
|
|
|
|
// with the earliest starting address.
|
2015-10-15 15:59:49 -07:00
|
|
|
func bestFit(list *mSpanList, npage uintptr, best *mspan) *mspan {
|
|
|
|
|
for s := list.first; s != nil; s = s.next {
|
2014-11-11 17:05:02 -05:00
|
|
|
if s.npages < npage {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if best == nil || s.npages < best.npages || (s.npages == best.npages && s.start < best.start) {
|
|
|
|
|
best = s
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return best
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try to add at least npage pages of memory to the heap,
|
|
|
|
|
// returning whether it worked.
|
2015-09-26 12:31:59 -04:00
|
|
|
//
|
|
|
|
|
// h must be locked.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) grow(npage uintptr) bool {
|
2014-11-11 17:05:02 -05:00
|
|
|
// Ask for a big chunk, to reduce the number of mappings
|
|
|
|
|
// the operating system needs to track; also amortizes
|
|
|
|
|
// the overhead of an operating system mapping.
|
|
|
|
|
// Allocate a multiple of 64kB.
|
|
|
|
|
npage = round(npage, (64<<10)/_PageSize)
|
|
|
|
|
ask := npage << _PageShift
|
|
|
|
|
if ask < _HeapAllocChunk {
|
|
|
|
|
ask = _HeapAllocChunk
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
v := h.sysAlloc(ask)
|
2014-11-11 17:05:02 -05:00
|
|
|
if v == nil {
|
|
|
|
|
if ask > npage<<_PageShift {
|
|
|
|
|
ask = npage << _PageShift
|
2015-11-11 16:13:51 -08:00
|
|
|
v = h.sysAlloc(ask)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
if v == nil {
|
|
|
|
|
print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create a fake "in use" span and free it, so that the
|
|
|
|
|
// right coalescing happens.
|
2015-11-11 16:13:51 -08:00
|
|
|
s := (*mspan)(h.spanalloc.alloc())
|
|
|
|
|
s.init(pageID(uintptr(v)>>_PageShift), ask>>_PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
p := uintptr(s.start)
|
2015-10-26 17:53:22 -07:00
|
|
|
p -= (h.arena_start >> _PageShift)
|
2015-02-24 09:25:09 -08:00
|
|
|
for i := p; i < p+s.npages; i++ {
|
|
|
|
|
h_spans[i] = s
|
|
|
|
|
}
|
2015-11-02 14:09:24 -05:00
|
|
|
atomic.Store(&s.sweepgen, h.sweepgen)
|
2014-11-11 17:05:02 -05:00
|
|
|
s.state = _MSpanInUse
|
2015-09-26 12:31:59 -04:00
|
|
|
h.pagesInUse += uint64(npage)
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeSpanLocked(s, false, true, 0)
|
2014-11-11 17:05:02 -05:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Look up the span at the given address.
|
|
|
|
|
// Address is guaranteed to be in map
|
|
|
|
|
// and is guaranteed to be start or end of span.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) lookup(v unsafe.Pointer) *mspan {
|
2014-11-11 17:05:02 -05:00
|
|
|
p := uintptr(v)
|
2015-10-26 17:53:22 -07:00
|
|
|
p -= h.arena_start
|
2014-11-11 17:05:02 -05:00
|
|
|
return h_spans[p>>_PageShift]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Look up the span at the given address.
|
|
|
|
|
// Address is *not* guaranteed to be in map
|
|
|
|
|
// and may be anywhere in the span.
|
|
|
|
|
// Map entries for the middle of a span are only
|
|
|
|
|
// valid for allocated spans. Free spans may have
|
|
|
|
|
// other garbage in their middles, so we have to
|
|
|
|
|
// check for that.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) lookupMaybe(v unsafe.Pointer) *mspan {
|
2015-10-26 17:53:22 -07:00
|
|
|
if uintptr(v) < h.arena_start || uintptr(v) >= h.arena_used {
|
2014-11-11 17:05:02 -05:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
p := uintptr(v) >> _PageShift
|
|
|
|
|
q := p
|
2015-10-26 17:53:22 -07:00
|
|
|
q -= h.arena_start >> _PageShift
|
2014-11-11 17:05:02 -05:00
|
|
|
s := h_spans[q]
|
|
|
|
|
if s == nil || p < uintptr(s.start) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Free the span back into the heap.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) freeSpan(s *mspan, acct int32) {
|
[dev.cc] runtime: delete scalararg, ptrarg; rename onM to systemstack
Scalararg and ptrarg are not "signal safe".
Go code filling them out can be interrupted by a signal,
and then the signal handler runs, and if it also ends up
in Go code that uses scalararg or ptrarg, now the old
values have been smashed.
For the pieces of code that do need to run in a signal handler,
we introduced onM_signalok, which is really just onM
except that the _signalok is meant to convey that the caller
asserts that scalarg and ptrarg will be restored to their old
values after the call (instead of the usual behavior, zeroing them).
Scalararg and ptrarg are also untyped and therefore error-prone.
Go code can always pass a closure instead of using scalararg
and ptrarg; they were only really necessary for C code.
And there's no more C code.
For all these reasons, delete scalararg and ptrarg, converting
the few remaining references to use closures.
Once those are gone, there is no need for a distinction between
onM and onM_signalok, so replace both with a single function
equivalent to the current onM_signalok (that is, it can be called
on any of the curg, g0, and gsignal stacks).
The name onM and the phrase 'm stack' are misnomers,
because on most system an M has two system stacks:
the main thread stack and the signal handling stack.
Correct the misnomer by naming the replacement function systemstack.
Fix a few references to "M stack" in code.
The main motivation for this change is to eliminate scalararg/ptrarg.
Rick and I have already seen them cause problems because
the calling sequence m.ptrarg[0] = p is a heap pointer assignment,
so it gets a write barrier. The write barrier also uses onM, so it has
all the same problems as if it were being invoked by a signal handler.
We worked around this by saving and restoring the old values
and by calling onM_signalok, but there's no point in keeping this nice
home for bugs around any longer.
This CL also changes funcline to return the file name as a result
instead of filling in a passed-in *string. (The *string signature is
left over from when the code was written in and called from C.)
That's arguably an unrelated change, except that once I had done
the ptrarg/scalararg/onM cleanup I started getting false positives
about the *string argument escaping (not allowed in package runtime).
The compiler is wrong, but the easiest fix is to write the code like
Go code instead of like C code. I am a bit worried that the compiler
is wrong because of some use of uninitialized memory in the escape
analysis. If that's the reason, it will go away when we convert the
compiler to Go. (And if not, we'll debug it the next time.)
LGTM=khr
R=r, khr
CC=austin, golang-codereviews, iant, rlh
https://golang.org/cl/174950043
2014-11-12 14:54:31 -05:00
|
|
|
systemstack(func() {
|
2014-11-11 17:05:02 -05:00
|
|
|
mp := getg().m
|
|
|
|
|
lock(&h.lock)
|
runtime: introduce heap_live; replace use of heap_alloc in GC
Currently there are two main consumers of memstats.heap_alloc:
updatememstats (aka ReadMemStats) and shouldtriggergc.
updatememstats recomputes heap_alloc from the ground up, so we don't
need to keep heap_alloc up to date for it. shouldtriggergc wants to
know how many bytes were marked by the previous GC plus how many bytes
have been allocated since then, but this *isn't* what heap_alloc
tracks. heap_alloc also includes objects that are not marked and
haven't yet been swept.
Introduce a new memstat called heap_live that actually tracks what
shouldtriggergc wants to know and stop keeping heap_alloc up to date.
Unlike heap_alloc, heap_live follows a simple sawtooth that drops
during each mark termination and increases monotonically between GCs.
heap_alloc, on the other hand, has much more complicated behavior: it
may drop during sweep termination, slowly decreases from background
sweeping between GCs, is roughly unaffected by allocation as long as
there are unswept spans (because we sweep and allocate at the same
rate), and may go up after background sweeping is done depending on
the GC trigger.
heap_live simplifies computing next_gc and using it to figure out when
to trigger garbage collection. Currently, we guess next_gc at the end
of a cycle and update it as we sweep and get a better idea of how much
heap was marked. Now, since we're directly tracking how much heap is
marked, we can directly compute next_gc.
This also corrects bugs that could cause us to trigger GC early.
Currently, in any case where sweep termination actually finds spans to
sweep, heap_alloc is an overestimation of live heap, so we'll trigger
GC too early. heap_live, on the other hand, is unaffected by sweeping.
Change-Id: I1f96807b6ed60d4156e8173a8e68745ffc742388
Reviewed-on: https://go-review.googlesource.com/8389
Reviewed-by: Russ Cox <rsc@golang.org>
2015-03-30 18:01:32 -04:00
|
|
|
memstats.heap_live += uint64(mp.mcache.local_cachealloc)
|
2014-11-11 17:05:02 -05:00
|
|
|
mp.mcache.local_cachealloc = 0
|
2015-05-04 16:10:49 -04:00
|
|
|
memstats.heap_scan += uint64(mp.mcache.local_scan)
|
|
|
|
|
mp.mcache.local_scan = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
|
|
|
|
|
mp.mcache.local_tinyallocs = 0
|
|
|
|
|
if acct != 0 {
|
|
|
|
|
memstats.heap_objects--
|
|
|
|
|
}
|
2015-10-07 22:37:15 -07:00
|
|
|
if gcBlackenEnabled != 0 {
|
|
|
|
|
gcController.revise()
|
|
|
|
|
}
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeSpanLocked(s, true, true, 0)
|
2014-12-12 18:41:57 +01:00
|
|
|
if trace.enabled {
|
|
|
|
|
traceHeapAlloc()
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) freeStack(s *mspan) {
|
2014-11-11 17:05:02 -05:00
|
|
|
_g_ := getg()
|
|
|
|
|
if _g_ != _g_.m.g0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("mheap_freestack not on g0 stack")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
s.needzero = 1
|
|
|
|
|
lock(&h.lock)
|
|
|
|
|
memstats.stacks_inuse -= uint64(s.npages << _PageShift)
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeSpanLocked(s, true, true, 0)
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&h.lock)
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-15 15:59:49 -07:00
|
|
|
// s must be on a busy list (h.busy or h.busylarge) or unlinked.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
|
2014-11-11 17:05:02 -05:00
|
|
|
switch s.state {
|
|
|
|
|
case _MSpanStack:
|
|
|
|
|
if s.ref != 0 {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_FreeSpanLocked - invalid stack free")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
case _MSpanInUse:
|
|
|
|
|
if s.ref != 0 || s.sweepgen != h.sweepgen {
|
|
|
|
|
print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_FreeSpanLocked - invalid free")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-09-26 12:31:59 -04:00
|
|
|
h.pagesInUse -= uint64(s.npages)
|
2014-11-11 17:05:02 -05:00
|
|
|
default:
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MHeap_FreeSpanLocked - invalid span state")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if acctinuse {
|
|
|
|
|
memstats.heap_inuse -= uint64(s.npages << _PageShift)
|
|
|
|
|
}
|
|
|
|
|
if acctidle {
|
|
|
|
|
memstats.heap_idle += uint64(s.npages << _PageShift)
|
|
|
|
|
}
|
|
|
|
|
s.state = _MSpanFree
|
2015-11-11 16:13:51 -08:00
|
|
|
if s.inList() {
|
|
|
|
|
h.busyList(s.npages).remove(s)
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
// Stamp newly unused spans. The scavenger will use that
|
|
|
|
|
// info to potentially give back some pages to the OS.
|
2015-02-10 18:51:13 +03:00
|
|
|
s.unusedsince = unusedsince
|
|
|
|
|
if unusedsince == 0 {
|
|
|
|
|
s.unusedsince = nanotime()
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
s.npreleased = 0
|
|
|
|
|
|
|
|
|
|
// Coalesce with earlier, later spans.
|
|
|
|
|
p := uintptr(s.start)
|
2015-10-26 17:53:22 -07:00
|
|
|
p -= h.arena_start >> _PageShift
|
2014-11-11 17:05:02 -05:00
|
|
|
if p > 0 {
|
|
|
|
|
t := h_spans[p-1]
|
2015-10-15 15:59:49 -07:00
|
|
|
if t != nil && t.state == _MSpanFree {
|
2014-11-11 17:05:02 -05:00
|
|
|
s.start = t.start
|
|
|
|
|
s.npages += t.npages
|
|
|
|
|
s.npreleased = t.npreleased // absorb released pages
|
|
|
|
|
s.needzero |= t.needzero
|
|
|
|
|
p -= t.npages
|
|
|
|
|
h_spans[p] = s
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeList(t.npages).remove(t)
|
2014-11-11 17:05:02 -05:00
|
|
|
t.state = _MSpanDead
|
2015-11-11 16:13:51 -08:00
|
|
|
h.spanalloc.free(unsafe.Pointer(t))
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-11-11 12:39:30 -05:00
|
|
|
if (p+s.npages)*sys.PtrSize < h.spans_mapped {
|
2014-11-11 17:05:02 -05:00
|
|
|
t := h_spans[p+s.npages]
|
2015-10-15 15:59:49 -07:00
|
|
|
if t != nil && t.state == _MSpanFree {
|
2014-11-11 17:05:02 -05:00
|
|
|
s.npages += t.npages
|
|
|
|
|
s.npreleased += t.npreleased
|
|
|
|
|
s.needzero |= t.needzero
|
|
|
|
|
h_spans[p+s.npages-1] = s
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeList(t.npages).remove(t)
|
2014-11-11 17:05:02 -05:00
|
|
|
t.state = _MSpanDead
|
2015-11-11 16:13:51 -08:00
|
|
|
h.spanalloc.free(unsafe.Pointer(t))
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Insert s into appropriate list.
|
2015-11-11 16:13:51 -08:00
|
|
|
h.freeList(s.npages).insert(s)
|
2015-10-15 15:59:49 -07:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) freeList(npages uintptr) *mSpanList {
|
2015-10-15 15:59:49 -07:00
|
|
|
if npages < uintptr(len(h.free)) {
|
|
|
|
|
return &h.free[npages]
|
|
|
|
|
}
|
|
|
|
|
return &h.freelarge
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) busyList(npages uintptr) *mSpanList {
|
2015-10-15 15:59:49 -07:00
|
|
|
if npages < uintptr(len(h.free)) {
|
|
|
|
|
return &h.busy[npages]
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
return &h.busylarge
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-10-15 15:59:49 -07:00
|
|
|
func scavengelist(list *mSpanList, now, limit uint64) uintptr {
|
2015-11-11 12:39:30 -05:00
|
|
|
if sys.PhysPageSize > _PageSize {
|
2015-02-26 20:29:58 +11:00
|
|
|
// golang.org/issue/9993
|
|
|
|
|
// If the physical page size of the machine is larger than
|
|
|
|
|
// our logical heap page size the kernel may round up the
|
|
|
|
|
// amount to be freed to its page size and corrupt the heap
|
|
|
|
|
// pages surrounding the unused block.
|
|
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
if list.isEmpty() {
|
2014-11-11 17:05:02 -05:00
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var sumreleased uintptr
|
2015-10-15 15:59:49 -07:00
|
|
|
for s := list.first; s != nil; s = s.next {
|
2014-11-11 17:05:02 -05:00
|
|
|
if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
|
|
|
|
|
released := (s.npages - s.npreleased) << _PageShift
|
|
|
|
|
memstats.heap_released += uint64(released)
|
|
|
|
|
sumreleased += released
|
|
|
|
|
s.npreleased = s.npages
|
2015-10-15 14:33:50 -07:00
|
|
|
sysUnused(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return sumreleased
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (h *mheap) scavenge(k int32, now, limit uint64) {
|
2014-11-11 17:05:02 -05:00
|
|
|
lock(&h.lock)
|
|
|
|
|
var sumreleased uintptr
|
|
|
|
|
for i := 0; i < len(h.free); i++ {
|
|
|
|
|
sumreleased += scavengelist(&h.free[i], now, limit)
|
|
|
|
|
}
|
|
|
|
|
sumreleased += scavengelist(&h.freelarge, now, limit)
|
|
|
|
|
unlock(&h.lock)
|
|
|
|
|
|
|
|
|
|
if debug.gctrace > 0 {
|
|
|
|
|
if sumreleased > 0 {
|
|
|
|
|
print("scvg", k, ": ", sumreleased>>20, " MB released\n")
|
|
|
|
|
}
|
|
|
|
|
// TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
|
|
|
|
|
// But we can't call ReadMemStats on g0 holding locks.
|
|
|
|
|
print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 15:48:40 -05:00
|
|
|
//go:linkname runtime_debug_freeOSMemory runtime/debug.freeOSMemory
|
|
|
|
|
func runtime_debug_freeOSMemory() {
|
2015-10-23 14:15:18 -04:00
|
|
|
gcStart(gcForceBlockMode, false)
|
2015-11-11 16:13:51 -08:00
|
|
|
systemstack(func() { mheap_.scavenge(-1, ^uint64(0), 0) })
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Initialize a new span with the given start and npages.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (span *mspan) init(start pageID, npages uintptr) {
|
2014-11-11 17:05:02 -05:00
|
|
|
span.next = nil
|
|
|
|
|
span.prev = nil
|
2015-10-15 15:59:49 -07:00
|
|
|
span.list = nil
|
2014-11-11 17:05:02 -05:00
|
|
|
span.start = start
|
|
|
|
|
span.npages = npages
|
2014-11-20 12:08:13 -05:00
|
|
|
span.freelist = 0
|
2014-11-11 17:05:02 -05:00
|
|
|
span.ref = 0
|
|
|
|
|
span.sizeclass = 0
|
|
|
|
|
span.incache = false
|
|
|
|
|
span.elemsize = 0
|
|
|
|
|
span.state = _MSpanDead
|
|
|
|
|
span.unusedsince = 0
|
|
|
|
|
span.npreleased = 0
|
|
|
|
|
span.speciallock.key = 0
|
|
|
|
|
span.specials = nil
|
|
|
|
|
span.needzero = 0
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (span *mspan) inList() bool {
|
2015-10-15 15:59:49 -07:00
|
|
|
return span.prev != nil
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Initialize an empty doubly-linked list.
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) init() {
|
2015-10-15 15:59:49 -07:00
|
|
|
list.first = nil
|
|
|
|
|
list.last = &list.first
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) remove(span *mspan) {
|
2015-10-15 15:59:49 -07:00
|
|
|
if span.prev == nil || span.list != list {
|
|
|
|
|
println("failed MSpanList_Remove", span, span.prev, span.list, list)
|
|
|
|
|
throw("MSpanList_Remove")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
if span.next != nil {
|
|
|
|
|
span.next.prev = span.prev
|
|
|
|
|
} else {
|
|
|
|
|
// TODO: After we remove the span.list != list check above,
|
|
|
|
|
// we could at least still check list.last == &span.next here.
|
|
|
|
|
list.last = span.prev
|
|
|
|
|
}
|
|
|
|
|
*span.prev = span.next
|
2014-11-11 17:05:02 -05:00
|
|
|
span.next = nil
|
2015-10-15 15:59:49 -07:00
|
|
|
span.prev = nil
|
|
|
|
|
span.list = nil
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) isEmpty() bool {
|
2015-10-15 15:59:49 -07:00
|
|
|
return list.first == nil
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) insert(span *mspan) {
|
2015-10-15 15:59:49 -07:00
|
|
|
if span.next != nil || span.prev != nil || span.list != nil {
|
|
|
|
|
println("failed MSpanList_Insert", span, span.next, span.prev, span.list)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MSpanList_Insert")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
span.next = list.first
|
|
|
|
|
if list.first != nil {
|
|
|
|
|
list.first.prev = &span.next
|
|
|
|
|
} else {
|
|
|
|
|
list.last = &span.next
|
|
|
|
|
}
|
|
|
|
|
list.first = span
|
|
|
|
|
span.prev = &list.first
|
|
|
|
|
span.list = list
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-11-11 16:13:51 -08:00
|
|
|
func (list *mSpanList) insertBack(span *mspan) {
|
2015-10-15 15:59:49 -07:00
|
|
|
if span.next != nil || span.prev != nil || span.list != nil {
|
|
|
|
|
println("failed MSpanList_InsertBack", span, span.next, span.prev, span.list)
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("MSpanList_InsertBack")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
2015-10-15 15:59:49 -07:00
|
|
|
span.next = nil
|
|
|
|
|
span.prev = list.last
|
|
|
|
|
*list.last = span
|
|
|
|
|
list.last = &span.next
|
|
|
|
|
span.list = list
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
const (
|
|
|
|
|
_KindSpecialFinalizer = 1
|
|
|
|
|
_KindSpecialProfile = 2
|
|
|
|
|
// Note: The finalizer special must be first because if we're freeing
|
|
|
|
|
// an object, a finalizer special will cause the freeing operation
|
|
|
|
|
// to abort, and we want to keep the other special records around
|
|
|
|
|
// if that happens.
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type special struct {
|
|
|
|
|
next *special // linked list in span
|
|
|
|
|
offset uint16 // span offset of object
|
|
|
|
|
kind byte // kind of special
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Adds the special record s to the list of special records for
|
|
|
|
|
// the object p. All fields of s should be filled in except for
|
|
|
|
|
// offset & next, which this routine will fill in.
|
|
|
|
|
// Returns true if the special was successfully added, false otherwise.
|
|
|
|
|
// (The add will fail only if a record with the same p and s->kind
|
|
|
|
|
// already exists.)
|
|
|
|
|
func addspecial(p unsafe.Pointer, s *special) bool {
|
2015-11-11 16:13:51 -08:00
|
|
|
span := mheap_.lookupMaybe(p)
|
2014-11-11 17:05:02 -05:00
|
|
|
if span == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("addspecial on invalid pointer")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ensure that the span is swept.
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
// Sweeping accesses the specials list w/o locks, so we have
|
|
|
|
|
// to synchronize with it. And it's just much safer.
|
2014-11-11 17:05:02 -05:00
|
|
|
mp := acquirem()
|
2015-11-11 16:13:51 -08:00
|
|
|
span.ensureSwept()
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
offset := uintptr(p) - uintptr(span.start<<_PageShift)
|
|
|
|
|
kind := s.kind
|
|
|
|
|
|
|
|
|
|
lock(&span.speciallock)
|
|
|
|
|
|
|
|
|
|
// Find splice point, check for existing record.
|
|
|
|
|
t := &span.specials
|
|
|
|
|
for {
|
|
|
|
|
x := *t
|
|
|
|
|
if x == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if offset == uintptr(x.offset) && kind == x.kind {
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
return false // already exists
|
|
|
|
|
}
|
|
|
|
|
if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
t = &x.next
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Splice in record, fill in offset.
|
|
|
|
|
s.offset = uint16(offset)
|
|
|
|
|
s.next = *t
|
|
|
|
|
*t = s
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Removes the Special record of the given kind for the object p.
|
|
|
|
|
// Returns the record if the record existed, nil otherwise.
|
|
|
|
|
// The caller must FixAlloc_Free the result.
|
|
|
|
|
func removespecial(p unsafe.Pointer, kind uint8) *special {
|
2015-11-11 16:13:51 -08:00
|
|
|
span := mheap_.lookupMaybe(p)
|
2014-11-11 17:05:02 -05:00
|
|
|
if span == nil {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("removespecial on invalid pointer")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ensure that the span is swept.
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
// Sweeping accesses the specials list w/o locks, so we have
|
|
|
|
|
// to synchronize with it. And it's just much safer.
|
2014-11-11 17:05:02 -05:00
|
|
|
mp := acquirem()
|
2015-11-11 16:13:51 -08:00
|
|
|
span.ensureSwept()
|
2014-11-11 17:05:02 -05:00
|
|
|
|
|
|
|
|
offset := uintptr(p) - uintptr(span.start<<_PageShift)
|
|
|
|
|
|
|
|
|
|
lock(&span.speciallock)
|
|
|
|
|
t := &span.specials
|
|
|
|
|
for {
|
|
|
|
|
s := *t
|
|
|
|
|
if s == nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
// This function is used for finalizers only, so we don't check for
|
|
|
|
|
// "interior" specials (p must be exactly equal to s->offset).
|
|
|
|
|
if offset == uintptr(s.offset) && kind == s.kind {
|
|
|
|
|
*t = s.next
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
t = &s.next
|
|
|
|
|
}
|
|
|
|
|
unlock(&span.speciallock)
|
|
|
|
|
releasem(mp)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// The described object has a finalizer set for it.
|
|
|
|
|
type specialfinalizer struct {
|
|
|
|
|
special special
|
|
|
|
|
fn *funcval
|
|
|
|
|
nret uintptr
|
|
|
|
|
fint *_type
|
|
|
|
|
ot *ptrtype
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Adds a finalizer to the object p. Returns true if it succeeded.
|
|
|
|
|
func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
s := (*specialfinalizer)(mheap_.specialfinalizeralloc.alloc())
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
s.special.kind = _KindSpecialFinalizer
|
|
|
|
|
s.fn = f
|
|
|
|
|
s.nret = nret
|
|
|
|
|
s.fint = fint
|
|
|
|
|
s.ot = ot
|
|
|
|
|
if addspecial(p, &s.special) {
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
// This is responsible for maintaining the same
|
|
|
|
|
// GC-related invariants as markrootSpans in any
|
|
|
|
|
// situation where it's possible that markrootSpans
|
|
|
|
|
// has already run but mark termination hasn't yet.
|
|
|
|
|
if gcphase != _GCoff {
|
|
|
|
|
_, base, _ := findObject(p)
|
|
|
|
|
mp := acquirem()
|
|
|
|
|
gcw := &mp.p.ptr().gcw
|
|
|
|
|
// Mark everything reachable from the object
|
|
|
|
|
// so it's retained for the finalizer.
|
|
|
|
|
scanobject(uintptr(base), gcw)
|
|
|
|
|
// Mark the finalizer itself, since the
|
|
|
|
|
// special isn't part of the GC'd heap.
|
2015-11-11 12:39:30 -05:00
|
|
|
scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
|
runtime: scan objects with finalizers concurrently
This reduces pause time by ~25% relative to tip and by ~50% relative
to Go 1.5.1.
Currently one of the steps of STW mark termination is to loop (in
parallel) over all spans to find objects with finalizers in order to
mark all objects reachable from these objects and to treat the
finalizer special as a root. Unfortunately, even if there are no
finalizers at all, this loop takes roughly 1 ms/heap GB/core, so
multi-gigabyte heaps can quickly push our STW time past 10ms.
Fix this by moving this scan from mark termination to concurrent scan,
where it can run in parallel with mutators. The loop itself could also
be optimized, but this cost is small compared to concurrent marking.
Making this scan concurrent introduces two complications:
1) The scan currently walks the specials list of each span without
locking it, which is safe only with the world stopped. We fix this by
speculatively checking if a span has any specials (the vast majority
won't) and then locking the specials list only if there are specials
to check.
2) An object can have a finalizer set after concurrent scan, in which
case it won't have been marked appropriately by concurrent scan. If
the finalizer is a closure and is only reachable from the special, it
could be swept before it is run. Likewise, if the object is not marked
yet when the finalizer is set and then becomes unreachable before it
is marked, other objects reachable only from it may be swept before
the finalizer function is run. We fix this issue by making
addfinalizer ensure the same marking invariants as markroot does.
For multi-gigabyte heaps, this reduces max pause time by 20%–30%
relative to tip (depending on GOMAXPROCS) and by ~50% relative to Go
1.5.1 (where this loop was neither concurrent nor parallel). Here are
the results for the garbage benchmark:
---------------- max pause ----------------
Heap Procs Concurrent scan STW parallel scan 1.5.1
24GB 12 18ms 23ms 37ms
24GB 4 18ms 25ms 37ms
4GB 4 3.8ms 4.9ms 6.9ms
In all cases, 95%ile pause time is similar to the max pause time. This
also improves mean STW time by 10%–30%.
Fixes #11485.
Change-Id: I9359d8c3d120a51d23d924b52bf853a1299b1dfd
Reviewed-on: https://go-review.googlesource.com/14982
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-09-24 14:39:27 -04:00
|
|
|
if gcBlackenPromptly {
|
|
|
|
|
gcw.dispose()
|
|
|
|
|
}
|
|
|
|
|
releasem(mp)
|
|
|
|
|
}
|
2014-11-11 17:05:02 -05:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// There was an old finalizer
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Removes the finalizer (if any) from the object p.
|
|
|
|
|
func removefinalizer(p unsafe.Pointer) {
|
|
|
|
|
s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
|
|
|
|
|
if s == nil {
|
|
|
|
|
return // there wasn't a finalizer to remove
|
|
|
|
|
}
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialfinalizeralloc.free(unsafe.Pointer(s))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-19 13:38:46 -05:00
|
|
|
// The described object is being heap profiled.
|
|
|
|
|
type specialprofile struct {
|
|
|
|
|
special special
|
|
|
|
|
b *bucket
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-11 17:05:02 -05:00
|
|
|
// Set the heap profile bucket associated with addr to b.
|
|
|
|
|
func setprofilebucket(p unsafe.Pointer, b *bucket) {
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
s := (*specialprofile)(mheap_.specialprofilealloc.alloc())
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
s.special.kind = _KindSpecialProfile
|
|
|
|
|
s.b = b
|
|
|
|
|
if !addspecial(p, &s.special) {
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("setprofilebucket: profile already set")
|
2014-11-11 17:05:02 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Do whatever cleanup needs to be done to deallocate s. It has
|
|
|
|
|
// already been unlinked from the MSpan specials list.
|
2015-11-03 20:00:21 +01:00
|
|
|
func freespecial(s *special, p unsafe.Pointer, size uintptr) {
|
2014-11-11 17:05:02 -05:00
|
|
|
switch s.kind {
|
|
|
|
|
case _KindSpecialFinalizer:
|
|
|
|
|
sf := (*specialfinalizer)(unsafe.Pointer(s))
|
|
|
|
|
queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
|
|
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialfinalizeralloc.free(unsafe.Pointer(sf))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
case _KindSpecialProfile:
|
|
|
|
|
sp := (*specialprofile)(unsafe.Pointer(s))
|
2015-11-03 20:00:21 +01:00
|
|
|
mProf_Free(sp.b, size)
|
2014-11-11 17:05:02 -05:00
|
|
|
lock(&mheap_.speciallock)
|
2015-11-11 16:13:51 -08:00
|
|
|
mheap_.specialprofilealloc.free(unsafe.Pointer(sp))
|
2014-11-11 17:05:02 -05:00
|
|
|
unlock(&mheap_.speciallock)
|
|
|
|
|
default:
|
2014-12-27 20:58:00 -08:00
|
|
|
throw("bad special kind")
|
2014-11-11 17:05:02 -05:00
|
|
|
panic("not reached")
|
|
|
|
|
}
|
|
|
|
|
}
|